summaryrefslogtreecommitdiff
path: root/bstruct.scm
diff options
context:
space:
mode:
Diffstat (limited to 'bstruct.scm')
-rw-r--r--bstruct.scm1141
1 files changed, 1141 insertions, 0 deletions
diff --git a/bstruct.scm b/bstruct.scm
new file mode 100644
index 0000000..529aa4d
--- /dev/null
+++ b/bstruct.scm
@@ -0,0 +1,1141 @@
+;;; guile-bstruct -- Binary structures for Guile
+;;; Copyright © 2024 David Thompson <dthompson2@worcester.edu>
+;;;
+;;; Licensed under the Apache License, Version 2.0 (the "License");
+;;; you may not use this file except in compliance with the License.
+;;; You may obtain a copy of the License at
+;;;
+;;; http://www.apache.org/licenses/LICENSE-2.0
+;;;
+;;; Unless required by applicable law or agreed to in writing, software
+;;; distributed under the License is distributed on an "AS IS" BASIS,
+;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+;;; See the License for the specific language governing permissions and
+;;; limitations under the License.
+
+;;; Commentary:
+;;
+;; A bstruct is a data type that encapsulates a bytevector and an
+;; offset which interprets that bytevector based on a given layout.
+;; Bstructs are useful for manipulating C structs when using the FFI,
+;; or interpreting/manipulating packed binary data such as a GPU
+;; vertex buffer, or for data types that benefit from unboxed math
+;; optimizations such as vectors and matrices.
+;;
+;; Inspired by guile-opengl's define-packed-struct and based on
+;; "Ftypes: Structured foreign types" by Andy Keep and R. Kent Dybvig.
+;;
+;; http://scheme2011.ucombinator.org/papers/Keep2011.pdf
+;;
+;; Features:
+;;
+;; - Efficiency through macro magic. Procedural macros generate tasty
+;; code for the compiler to gnaw on and emit efficient bytecode.
+;; Runtime checks are reduced to the bare minimum.
+;;
+;; - Raw bytevector access mode. Easily read/write structured data
+;; from/to raw bytevectors without going through an intermediary
+;; struct. Very useful for batch processing in situations when the
+;; overhead of creating wrapper structs would hinder throughput.
+;;
+;; - Cache friendly. In performance sensitive code, many bstructs can
+;; be stored in contiguous memory by pre-allocating a large bytevector
+;; for the underlying storage. Individual bstruct handles simply
+;; point at different offsets.
+;;
+;;; Code:
+
+(define-module (bstruct)
+ #:use-module (ice-9 exceptions)
+ #:use-module (ice-9 match)
+ #:use-module (rnrs bytevectors)
+ #:use-module (srfi srfi-1)
+ #:use-module (srfi srfi-9)
+ #:use-module (srfi srfi-11)
+ #:use-module (system base target)
+ #:use-module ((system foreign) #:prefix ffi:)
+ #:use-module (system syntax)
+ #:export (bstruct?
+ bstruct=?
+ bstruct-type
+ bstruct->sexp
+ bstruct->pointer
+ pointer->bstruct
+ bstruct-wrap
+ bstruct-unwrap
+ bstruct-alloc
+ bstruct-sizeof
+ bstruct-alignof
+ bstruct-ref
+ bstruct-&ref
+ bstruct-set!
+ bstruct-unpack
+ bstruct-pack!
+ bstruct-copy
+ bstruct-copy!
+ define-bstruct))
+
+(define-record-type <scalar>
+ (make-scalar size alignment native? type)
+ scalar?
+ (size scalar-size)
+ (alignment scalar-alignment)
+ (native? scalar-native?) ; native endianness?
+ (type scalar-type))
+
+(define-record-type <struct-field>
+ (make-struct-field name offset type)
+ struct-field?
+ (name struct-field-name)
+ (offset struct-field-offset)
+ (type struct-field-type))
+
+(define-record-type <struct>
+ (make-struct size alignment fields)
+ %struct?
+ (size struct-size)
+ (alignment struct-alignment)
+ (fields struct-fields))
+
+(define-record-type <union-field>
+ (make-union-field name type)
+ union-field?
+ (name union-field-name)
+ (type union-field-type))
+
+(define-record-type <union>
+ (make-union size alignment fields)
+ union?
+ (size union-size)
+ (alignment union-alignment)
+ (fields union-fields))
+
+(define-record-type <array>
+ (make-array size alignment length type)
+ array?
+ (size array-size)
+ (alignment array-alignment)
+ (length array-length)
+ (type array-type))
+
+(define-record-type <pointer>
+ (make-pointer size alignment type)
+ pointer?
+ (size pointer-size)
+ (alignment pointer-alignment)
+ ;; Mutable for recursive types.
+ (type pointer-type set-pointer-type!))
+
+;; TODO: functions
+;; TODO: bitfields
+
+(define (struct-field-ref struct name)
+ (and=> (find (match-lambda
+ (($ <struct-field> name*)
+ (eq? name name*)))
+ (struct-fields struct))
+ struct-field-type))
+
+(define (union-field-ref struct name)
+ (and=> (find (match-lambda
+ (($ <union-field> name*)
+ (eq? name name*)))
+ (union-fields struct))
+ union-field-type))
+
+(define (bstruct-type? obj)
+ (or (scalar? obj)
+ (%struct? obj)
+ (union? obj)
+ (array? obj)
+ (pointer? obj)))
+
+(define (sizeof type)
+ (match type
+ ((or ($ <scalar> size)
+ ($ <struct> size)
+ ($ <union> size)
+ ($ <array> size)
+ ($ <pointer> size))
+ size)))
+
+(define (alignof type)
+ (match type
+ ((or ($ <scalar> _ alignment)
+ ($ <struct> _ alignment)
+ ($ <union> _ alignment)
+ ($ <array> _ alignment)
+ ($ <pointer> _ alignment))
+ alignment)))
+
+(define-syntax-rule (assert expr who)
+ (unless expr
+ (raise-exception
+ (make-exception (make-assertion-failure)
+ (make-exception-with-origin who)
+ (make-exception-with-irritants '(expr))))))
+
+;; Bstructs form a shallow vtable hierarchy.
+(define <bstruct-descriptor>
+ (make-vtable (string-append standard-vtable-fields "pwpw")
+ (lambda (desc port)
+ (format port "#<bstruct-descriptor ~a>"
+ (object-address desc)))))
+
+(define (bstruct-descriptor-name descriptor)
+ (struct-ref descriptor vtable-offset-user))
+
+(define (bstruct-descriptor-type descriptor)
+ (struct-ref descriptor (+ vtable-offset-user 1)))
+
+(define (bstruct-descriptor-sizeof descriptor)
+ (sizeof (bstruct-descriptor-type descriptor)))
+
+(define (print-bstruct bs port)
+ (format port "#<~a ~a>"
+ (bstruct-descriptor-name (struct-vtable bs))
+ (bstruct->sexp bs)))
+
+(define* (make-bstruct-descriptor name type #:key (printer print-bstruct))
+ (assert (bstruct-type? type) 'make-bstruct-descriptor)
+ (make-struct/no-tail <bstruct-descriptor>
+ (make-struct-layout "pwpw")
+ printer name type))
+
+(define (bstruct-descriptor? obj)
+ (and (struct? obj) (eq? (struct-vtable obj) <bstruct-descriptor>)))
+
+(define (%bstruct? obj)
+ (and (struct? obj) (bstruct-descriptor? (struct-vtable obj))))
+
+(define (bstruct-bytevector bs)
+ (assert (%bstruct? bs) 'bstruct-bytevector)
+ (struct-ref bs 0))
+
+(define (bstruct-offset bs)
+ (assert (%bstruct? bs) 'bstruct-bytevector)
+ (struct-ref bs 1))
+
+(define (bstruct-type bs)
+ (assert (%bstruct? bs) 'bstruct-bytevector)
+ (bstruct-descriptor-type (struct-vtable bs)))
+
+;; Bstructs are composed of a type descriptor, a bytevector that
+;; provides storage, and an offset pointing to the start of the struct
+;; data within that bytevector.
+(define (%make-bstruct descriptor bv offset)
+ (make-struct/no-tail descriptor bv offset))
+
+(define (make-bstruct descriptor bv offset)
+ (assert (bstruct-descriptor? descriptor) 'make-bstruct)
+ (assert (bytevector? bv) 'make-bstruct)
+ (assert (exact-integer? offset) 'make-bstruct)
+ (assert (>= offset 0) 'make-bstruct)
+ (assert (<= (+ offset (bstruct-descriptor-sizeof descriptor))
+ (bytevector-length bv))
+ 'make-bstruct)
+ (%make-bstruct descriptor bv offset))
+
+;; Platform ABI details
+(define (non-target-endianness)
+ (if (eq? (target-endianness) (endianness little))
+ (endianness big)
+ (endianness little)))
+
+(define (sizeof* type)
+ (match type
+ ((or 'u8 's8) 1)
+ ((or 'u16 's16) 2)
+ ((or 'u32 's32 'f32) 4)
+ ((or 'u64 's64 'f64) 8)
+ (_
+ (if (string=? %host-type (target-type))
+ (match type
+ ('uint (ffi:sizeof ffi:unsigned-int))
+ ('int (ffi:sizeof ffi:int))
+ ('ulong (ffi:sizeof ffi:unsigned-long))
+ ('long (ffi:sizeof ffi:long))
+ ('ushort (ffi:sizeof ffi:unsigned-short))
+ ('short (ffi:sizeof ffi:short))
+ ('size_t (ffi:sizeof ffi:size_t))
+ ('ssize_t (ffi:sizeof ffi:ssize_t))
+ ('ptrdiff_t (ffi:sizeof ffi:ptrdiff_t))
+ ('intptr_t (ffi:sizeof ffi:intptr_t))
+ ('uintptr_t (ffi:sizeof ffi:uintptr_t)))
+ ;; FIXME: Fill in with proper ABI details. We will lazily
+ ;; evaluate this work when we hit a problem in a cross build.
+ (match type
+ ('uint 4)
+ ('int 4)
+ ('ulong 8)
+ ('long 8)
+ ('ushort 2)
+ ('short 2)
+ ('size_t (target-word-size))
+ ('ssize_t (target-word-size))
+ ('ptrdiff_t (target-word-size))
+ ('intptr_t (target-word-size))
+ ('uintptr_t (target-word-size)))))))
+
+(define (alignof* type)
+ (match type
+ ((or 'u8 's8) 1)
+ ((or 'u16 's16) 2)
+ ((or 'u32 's32 'f32) 4)
+ ((or 'u64 's64 'f64) 8)
+ (_
+ (if (string=? %host-type (target-type))
+ (match type
+ ('uint (ffi:sizeof ffi:unsigned-int))
+ ('int (ffi:sizeof ffi:int))
+ ('ulong (ffi:sizeof ffi:unsigned-long))
+ ('long (ffi:sizeof ffi:long))
+ ('ushort (ffi:sizeof ffi:unsigned-short))
+ ('short (ffi:sizeof ffi:short))
+ ('size_t (ffi:sizeof ffi:size_t))
+ ('ssize_t (ffi:sizeof ffi:ssize_t))
+ ('ptrdiff_t (ffi:sizeof ffi:ptrdiff_t))
+ ('intptr_t (ffi:sizeof ffi:intptr_t))
+ ('uintptr_t (ffi:sizeof ffi:uintptr_t))
+ ('* (ffi:sizeof '*)))
+ (match type
+ ('uint 4)
+ ('int 4)
+ ('ulong 8)
+ ('long 8)
+ ('ushort 2)
+ ('short 2)
+ ('size_t (target-word-size))
+ ('ssize_t (target-word-size))
+ ('ptrdiff_t (target-word-size))
+ ('intptr_t (target-word-size))
+ ('uintptr_t (target-word-size))
+ ('* (target-word-size)))))))
+
+;; It is useful to see bstructs in s-expression form when working
+;; at the REPL.
+(define (bstruct->sexp bs)
+ (let ((bv (bstruct-bytevector bs)))
+ (let loop ((type (bstruct-type bs)) (offset (bstruct-offset bs)))
+ (match type
+ ((? bstruct-descriptor? desc)
+ (loop (bstruct-descriptor-type desc) offset))
+ (($ <scalar> _ _ native? type)
+ (let ((e (if native? (target-endianness) (non-target-endianness))))
+ (match type
+ ('u8 (bytevector-u8-ref bv offset))
+ ('s8 (bytevector-s8-ref bv offset))
+ ('u16 (bytevector-u16-ref bv offset e))
+ ('s16 (bytevector-s16-ref bv offset e))
+ ('u32 (bytevector-u32-ref bv offset e))
+ ('s32 (bytevector-s32-ref bv offset e))
+ ('u64 (bytevector-u64-ref bv offset e))
+ ('s64 (bytevector-s64-ref bv offset e))
+ ('f32 (bytevector-ieee-single-ref bv offset e))
+ ('f64 (bytevector-ieee-double-ref bv offset e))
+ ('uint (bytevector-uint-ref bv offset e (ffi:sizeof ffi:unsigned-int)))
+ ('int (bytevector-sint-ref bv offset e (ffi:sizeof ffi:int)))
+ ('ulong (bytevector-uint-ref bv offset e (ffi:sizeof ffi:unsigned-long)))
+ ('long (bytevector-sint-ref bv offset e (ffi:sizeof ffi:long)))
+ ('ushort (bytevector-uint-ref bv offset e (ffi:sizeof ffi:unsigned-short)))
+ ('short (bytevector-sint-ref bv offset e (ffi:sizeof ffi:short)))
+ ('size_t (bytevector-uint-ref bv offset e (ffi:sizeof ffi:size_t)))
+ ('ssize_t (bytevector-sint-ref bv offset e (ffi:sizeof ffi:ssize_t)))
+ ('ptrdiff_t (bytevector-sint-ref bv offset e (ffi:sizeof ffi:ptrdiff_t)))
+ ('intptr_t (bytevector-sint-ref bv offset e (ffi:sizeof ffi:intptr_t)))
+ ('uintptr_t (bytevector-uint-ref bv offset e (ffi:sizeof ffi:uintptr_t))))))
+ (($ <struct> _ _ fields)
+ `(struct ,@(map (match-lambda
+ (($ <struct-field> name offset* type)
+ (list name (loop type (+ offset offset*)))))
+ fields)))
+ (($ <union> _ _ fields)
+ `(union ,@(map (match-lambda
+ (($ <union-field> name type)
+ (list name (loop type offset))))
+ fields)))
+ (($ <array> _ _ length type)
+ (let ((size (sizeof type)))
+ `(array ,@(map (lambda (i)
+ (loop type (+ offset (* i size))))
+ (iota length)))))
+ (($ <pointer> _ _ type)
+ (match (bytevector-uint-ref bv offset (native-endianness)
+ (ffi:sizeof '*))
+ (0 'null)
+ ;; TODO: Dereference pointer. Needs cycle detection.
+ (address `(* ,address))))))))
+
+;; Macro helpers that use metadata attached to bstruct syntax
+;; transformers.
+(define (bstruct-descriptor-identifier? id)
+ (and (identifier? id)
+ (let-values (((kind val) (syntax-local-binding id)))
+ (and (eq? kind 'macro)
+ (procedure-property val 'bstruct?)))))
+
+(define (bstruct-descriptor-identifier-size id)
+ (let-values (((_ transformer) (syntax-local-binding id)))
+ (procedure-property transformer 'bstruct-size)))
+
+(define (bstruct-descriptor-identifier-alignment id)
+ (let-values (((_ transformer) (syntax-local-binding id)))
+ (procedure-property transformer 'bstruct-alignment)))
+
+(define-syntax bstruct?
+ (lambda (stx)
+ (syntax-case stx ()
+ (x
+ (identifier? #'x)
+ #'(case-lambda
+ ((obj) (%bstruct? obj))
+ ((<type> obj)
+ (match obj
+ (($ <type>) #t)
+ (_ #f)))))
+ ((_ obj)
+ #'(%bstruct? obj))
+ ((_ <type> obj)
+ (bstruct-descriptor-identifier? #'<type>)
+ #'(match obj
+ (($ <type>) #t)
+ (_ #f))))))
+
+(define-syntax bstruct=?
+ (lambda (stx)
+ (syntax-case stx ()
+ ((_ <type> a b)
+ (bstruct-descriptor-identifier? #'<type>)
+ #'(match a
+ (($ <type> bv-a offset-a)
+ (match b
+ (($ <type> bv-b offset-b)
+ (let ((n (bstruct-sizeof <type>)))
+ (let loop ((i 0))
+ (cond
+ ((= i n) #t)
+ ((= (bytevector-u8-ref bv-a (+ offset-a i))
+ (bytevector-u8-ref bv-b (+ offset-b i)))
+ (loop (+ i 1)))
+ (else #f))))))))))))
+
+(define-syntax bstruct->pointer
+ (lambda (stx)
+ (syntax-case stx ()
+ ((_ <type> bs)
+ (bstruct-descriptor-identifier? #'<type>)
+ #'(match bs
+ (($ <type> bv offset)
+ (ffi:bytevector->pointer bv offset)))))))
+
+(define-syntax pointer->bstruct
+ (lambda (stx)
+ (syntax-case stx ()
+ ((_ <type> ptr)
+ (bstruct-descriptor-identifier? #'<type>)
+ #'(let ((size (bstruct-sizeof <type>)))
+ (make-bstruct <type> (ffi:pointer->bytevector ptr size) 0))))))
+
+;; Wrap a bytevector in a bstruct.
+(define-syntax bstruct-wrap
+ (lambda (stx)
+ (syntax-case stx ()
+ ((_ <type> bv offset)
+ (bstruct-descriptor-identifier? #'<type>)
+ #'(make-bstruct <type> bv offset)))))
+
+;; Unwrap a bstruct to a bytevector + offset.
+(define-syntax bstruct-unwrap
+ (lambda (stx)
+ (syntax-case stx ()
+ ((_ <type> bs)
+ (bstruct-descriptor-identifier? #'<type>)
+ #`(match bs
+ (($ <type> bv offset)
+ (values bv offset)))))))
+
+;; Size/align queries.
+(define-syntax bstruct-sizeof
+ (lambda (stx)
+ (syntax-case stx ()
+ ((_ <type>)
+ (bstruct-descriptor-identifier? #'<type>)
+ (bstruct-descriptor-identifier-size #'<type>)))))
+
+(define-syntax bstruct-alignof
+ (lambda (stx)
+ (syntax-case stx ()
+ ((_ <type>)
+ (bstruct-descriptor-identifier? #'<type>)
+ (bstruct-descriptor-identifier-alignment #'<type>)))))
+
+;; 'bstruct-pack!' and 'bstruct-unpack' allow for directly
+;; interpreting bytevector contents as structured data.
+(define-syntax bstruct-pack!
+ (lambda (stx)
+ (syntax-case stx ()
+ ((_ (<type> (elem val) ...) bv i)
+ (bstruct-descriptor-identifier? #'<type>)
+ #`(begin
+ #,@(map (lambda (elem val)
+ (syntax-case elem ()
+ ((e ...)
+ #`((<type> setter e ...) bv i #,val))
+ (e
+ #`((<type> setter e) bv i #,val))))
+ #'(elem ...)
+ #'(val ...)))))))
+
+(define-syntax bstruct-unpack
+ (lambda (stx)
+ (syntax-case stx ()
+ ((_ (<type> elem ...) bv i)
+ (bstruct-descriptor-identifier? #'<type>)
+ #`(values
+ #,@(map (lambda (elem)
+ (syntax-case elem ()
+ ((e ...)
+ #'((<type> getter e ...) bv i))
+ (e
+ #'((<type> getter e) bv i))))
+ #'(elem ...)))))))
+
+;; Allocate a fresh bstruct that wraps a fresh bytevector big
+;; enough to store the entire structure.
+(define-syntax bstruct-alloc
+ (lambda (stx)
+ (syntax-case stx ()
+ ((_ <type> (elem val) ...)
+ (bstruct-descriptor-identifier? #'<type>)
+ (let ((size (bstruct-descriptor-identifier-size #'<type>)))
+ #`(let ((bv (make-bytevector #,size 0)))
+ (bstruct-pack! (<type> (elem val) ...) bv 0)
+ (bstruct-wrap <type> bv 0)))))))
+
+;; Return the value of an element.
+(define-syntax bstruct-ref
+ (lambda (stx)
+ (syntax-case stx ()
+ ((_ <type> elem bs)
+ (bstruct-descriptor-identifier? #'<type>)
+ #`(match bs
+ ;; Using 'exact-integer?' here assists the type inference
+ ;; pass and allows for unboxed addition of offsets.
+ (($ <type> bv (? exact-integer? offset))
+ (bstruct-unpack (<type> elem) bv offset)))))))
+
+;; Create a pointer to some element within a bstruct.
+(define-syntax bstruct-&ref
+ (lambda (stx)
+ (syntax-case stx ()
+ ((_ <type> (elem ...) bs)
+ (bstruct-descriptor-identifier? #'<type>)
+ #'(match bs
+ (($ <type> bv (? exact-integer? offset))
+ (call-with-values (lambda () ((<type> offset elem ...) bv offset))
+ (lambda (bv offset)
+ (ffi:bytevector->pointer bv offset))))))
+ ((_ <type> elem bs)
+ #'(bstruct-&ref <type> (elem) bs)))))
+
+;; Set the value of an element.
+(define-syntax bstruct-set!
+ (lambda (stx)
+ (syntax-case stx ()
+ ((_ <type> elem bs x)
+ (bstruct-descriptor-identifier? #'<type>)
+ #'(match bs
+ (($ <type> bv (? exact-integer? offset))
+ (bstruct-pack! (<type> (elem x)) bv offset)))))))
+
+(define-syntax bstruct-copy!
+ (lambda (stx)
+ (syntax-case stx ()
+ ((_ <type> src dst)
+ (bstruct-descriptor-identifier? #'<type>)
+ #'(match src
+ (($ <type> src-bv (? exact-integer? src-offset))
+ (match dst
+ (($ <type> dst-bv (? exact-integer? dst-offset))
+ (bytevector-copy! src-bv src-offset
+ dst-bv dst-offset
+ (bstruct-sizeof <type>))))))))))
+
+(define-syntax bstruct-copy
+ (lambda (stx)
+ (syntax-case stx ()
+ ((_ <type> src)
+ (bstruct-descriptor-identifier? #'<type>)
+ #'(let ((dst (bstruct-alloc <type>)))
+ (bstruct-copy! <type> src dst)
+ dst)))))
+
+(define (identifier-eq? stx sym)
+ (and (identifier? stx) (eq? (syntax->datum stx) sym)))
+
+;; The big gnarly procedural macro!
+(define-syntax define-bstruct
+ (lambda (stx)
+ (define primitives
+ '(u8 s8 u16 s16 u32 s32 u64 s64 f32 f64
+ int uint long ulong short ushort
+ size_t ssize_t ptrdiff_t intptr_t uintptr_t))
+ (define (target-endianness? e)
+ (eq? e (target-endianness)))
+ (define (resolve-endianness e)
+ (match e
+ ('native (target-endianness))
+ ('non-native (non-target-endianness))
+ (_ e)))
+ (define (identifier-memq? stx syms)
+ (and (identifier? stx) (memq (syntax->datum stx) syms)))
+ ;; Primitive getter/setter helpers
+ (define (ref/endianness proc endianness)
+ #`(lambda (bv i)
+ (#,proc bv i #,endianness)))
+ (define (set!/endianness proc endianness)
+ #`(lambda (bv i x)
+ (#,proc bv i x #,endianness)))
+ (define (uint-ref size endianness)
+ #`(lambda (bv i)
+ (bytevector-uint-ref bv i '#,(datum->syntax #f endianness) #,size)))
+ (define (uint-set! size endianness)
+ #`(lambda (bv i x)
+ (bytevector-uint-set! bv i x '#,(datum->syntax #f endianness) #,size)))
+ (define (sint-ref size endianness)
+ #`(lambda (bv i)
+ (bytevector-sint-ref bv i '#,(datum->syntax #f endianness) #,size)))
+ (define (sint-set! size endianness)
+ #`(lambda (bv i x)
+ (bytevector-sint-set! bv i x '#,(datum->syntax #f endianness) #,size)))
+ ;; Scalar types are divided into two categories: machine indepenent
+ ;; and machine dependent. The machine independent types (i32, f32,
+ ;; etc.) have a known size and alignment on all platforms. The
+ ;; machine dependent types have a size and alignment that can vary
+ ;; depending on the ABI of the system that is compiling the code.
+ (define (abi-ref type e) ; e for endianness
+ (match type
+ ('uint (uint-ref (sizeof* 'uint) e))
+ ('int (sint-ref (sizeof* 'int) e))
+ ('ulong (uint-ref (sizeof* 'ulong) e))
+ ('long (sint-ref (sizeof* 'long) e))
+ ('ushort (uint-ref (sizeof* 'ushort) e))
+ ('short (sint-ref (sizeof* 'short) e))
+ ('size_t (uint-ref (sizeof* 'size_t) e))
+ ('ssize_t (sint-ref (sizeof* 'ssize_t) e))
+ ('ptrdiff_t (sint-ref (sizeof* 'ptrdiff_t) e))
+ ('intptr_t (sint-ref (sizeof* 'intptr_t) e))
+ ('uintptr_t (uint-ref (sizeof* 'uintptr_t) e))))
+ (define (abi-set! type e)
+ (match type
+ ('uint (uint-set! (sizeof* 'uint) e))
+ ('int (sint-set! (sizeof* 'int) e))
+ ('ulong (uint-set! (sizeof* 'ulong) e))
+ ('long (sint-set! (sizeof* 'long) e))
+ ('ushort (uint-set! (sizeof* 'ushort) e))
+ ('short (sint-set! (sizeof* 'short) e))
+ ('size_t (uint-set! (sizeof* 'size_t) e))
+ ('ssize_t (sint-set! (sizeof* 'ssize_t) e))
+ ('ptrdiff_t (sint-set! (sizeof* 'ptrdiff_t) e))
+ ('intptr_t (sint-set! (sizeof* 'intptr_t) e))
+ ('uintptr_t (uint-set! (sizeof* 'uintptr_t) e))))
+ (define (primitive-getter size native? type)
+ (if native?
+ (match type
+ ('u8 #'bytevector-u8-ref)
+ ('s8 #'bytevector-s8-ref)
+ ('u16 #'bytevector-u16-native-ref)
+ ('s16 #'bytevector-s16-native-ref)
+ ('u32 #'bytevector-u32-native-ref)
+ ('s32 #'bytevector-s32-native-ref)
+ ('u64 #'bytevector-u64-native-ref)
+ ('s64 #'bytevector-s64-native-ref)
+ ('f32 #'bytevector-ieee-single-native-ref)
+ ('f64 #'bytevector-ieee-double-native-ref)
+ (_ (abi-ref type (target-endianness))))
+ (let ((e (non-target-endianness)))
+ (match type
+ ('u8 #'bytevector-u8-ref)
+ ('s8 #'bytevector-s8-ref)
+ ('u16 (ref/endianness #'bytevector-u16-ref e))
+ ('s16 (ref/endianness #'bytevector-s16-ref e))
+ ('u32 (ref/endianness #'bytevector-u32-ref e))
+ ('s32 (ref/endianness #'bytevector-s32-ref e))
+ ('u64 (ref/endianness #'bytevector-u64-ref e))
+ ('s64 (ref/endianness #'bytevector-s64-ref e))
+ ('f32 (ref/endianness #'bytevector-ieee-single-ref e))
+ ('f64 (ref/endianness #'bytevector-ieee-double-ref e))
+ (_ (abi-ref type e))))))
+ (define (primitive-setter size native? type)
+ (if native?
+ (match type
+ ('u8 #'bytevector-u8-set!)
+ ('s8 #'bytevector-s8-set!)
+ ('u16 #'bytevector-u16-native-set!)
+ ('s16 #'bytevector-s16-native-set!)
+ ('u32 #'bytevector-u32-native-set!)
+ ('s32 #'bytevector-s32-native-set!)
+ ('u64 #'bytevector-u64-native-set!)
+ ('s64 #'bytevector-s64-native-set!)
+ ('f32 #'bytevector-ieee-single-native-set!)
+ ('f64 #'bytevector-ieee-double-native-set!)
+ (_ (abi-set! type (target-endianness))))
+ (let ((e (non-target-endianness)))
+ (match type
+ ('u8 #'bytevector-u8-set!)
+ ('s8 #'bytevector-s8-set!)
+ ('u16 (set!/endianness #'bytevector-u16-set! e))
+ ('s16 (set!/endianness #'bytevector-s16-set! e))
+ ('u32 (set!/endianness #'bytevector-u32-set! e))
+ ('s32 (set!/endianness #'bytevector-s32-set! e))
+ ('u64 (set!/endianness #'bytevector-u64-set! e))
+ ('s64 (set!/endianness #'bytevector-s64-set! e))
+ ('f32 (set!/endianness #'bytevector-ieee-single-set! e))
+ ('f64 (set!/endianness #'bytevector-ieee-double-set! e))
+ (_ (abi-set! type e))))))
+ ;; Types can be recursive by referencing a type name within the
+ ;; same type group in a pointer expression.
+ ;;
+ ;; For example:
+ ;;
+ ;; (define-bstruct linked-list
+ ;; (struct (item int) (next (* linked-list))))
+ ;;
+ ;; To make this work, we keep a side table mapping type names to
+ ;; pointer type accessor forms that need to be patched with a
+ ;; cyclical reference *after* all the types are defined.
+ (define recursive-pointers (make-hash-table))
+ (define (sizeof type)
+ (match type
+ ((or ('scalar size _ _ _)
+ ('struct size _ _)
+ ('union size _ _)
+ ('array size _ _ _)
+ ('pointer size _ _))
+ size)
+ ((? bstruct-descriptor-identifier?)
+ (bstruct-descriptor-identifier-size type))))
+ (define (alignof type)
+ (match type
+ ((or ('scalar _ align _ _)
+ ('struct _ align _)
+ ('union _ align _)
+ ('array _ align _ _)
+ ('pointer _ align _))
+ align)
+ ((? bstruct-descriptor-identifier?)
+ (bstruct-descriptor-identifier-alignment type))))
+ (define (compute-type expr accessor type-ids packed? endianness)
+ (syntax-case expr ()
+ ;; Modifiers
+ ((packed expr)
+ (identifier-eq? #'packed 'packed)
+ (compute-type #'expr accessor type-ids #t endianness))
+ ((unpacked expr)
+ (identifier-eq? #'unpacked 'unpacked)
+ (compute-type #'expr accessor type-ids #f endianness))
+ ((endian e expr)
+ (and (identifier-eq? #'endian 'endian)
+ (identifier-memq? #'e '(native non-native big little)))
+ (compute-type #'expr accessor type-ids packed?
+ (resolve-endianness (syntax->datum #'e))))
+ ;; Previously defined types. 'packed?' and 'endianness' do
+ ;; not apply here.
+ (type-id
+ (bstruct-descriptor-identifier? #'type-id)
+ #'type-id)
+ ;; Primitive scalars
+ (primitive
+ (identifier-memq? #'primitive primitives)
+ (let ((type (syntax->datum #'primitive)))
+ `(scalar ,(sizeof* type)
+ ,(alignof* type)
+ ,(target-endianness? endianness)
+ ,type)))
+ ;; Structs
+ ((struct (field-name field-expr) ...)
+ (and (identifier-eq? #'struct 'struct)
+ (every identifier? #'(field-name ...)))
+ (let loop ((field-exprs #'((field-name field-expr) ...))
+ (fields '()) (offset 0) (alignment 0))
+ (syntax-case field-exprs ()
+ (()
+ ;; Round up to a multiple of alignment to get final
+ ;; size.
+ (let ((size (* (ceiling (/ offset alignment)) alignment)))
+ `(struct ,size ,alignment ,(reverse fields))))
+ ;; An underscore indicates a pseudo-field that is just
+ ;; for padding. It is not included in the struct field
+ ;; list and just adds to the offset.
+ (((underscore expr) . rest-exprs)
+ (identifier-eq? #'underscore '_)
+ (let ((type (compute-type #'expr #f type-ids packed? endianness)))
+ (loop #'rest-exprs fields (+ offset (sizeof type)) alignment)))
+ (((name expr) . rest-exprs)
+ (let* ((type (compute-type #'expr
+ #`(struct-field-ref #,accessor 'name)
+ type-ids packed? endianness))
+ (field-alignment (alignof type))
+ (padding (if packed?
+ 0
+ (modulo (- field-alignment
+ (modulo offset field-alignment))
+ field-alignment)))
+ (offset (+ offset padding))
+ (alignment (max alignment field-alignment)))
+ (loop #'rest-exprs
+ (cons (list (syntax->datum #'name) offset type) fields)
+ (+ offset (sizeof type))
+ alignment))))))
+ ;; Unions
+ ((union (field-name field-expr) ...)
+ (and (identifier-eq? #'union 'union)
+ (every identifier? #'(field-name ...)))
+ (let loop ((field-exprs #'((field-name field-expr) ...))
+ (fields '()) (size 0) (alignment 0))
+ (syntax-case field-exprs ()
+ (()
+ `(union ,size ,alignment ,(reverse fields)))
+ (((underscore expr) . rest-exprs)
+ (identifier-eq? #'underscore '_)
+ (let ((type (compute-type #'expr #f type-ids packed? endianness)))
+ (loop #'rest-exprs fields (max size (sizeof type)) alignment)))
+ (((name expr) . rest-exprs)
+ (let ((type (compute-type #'expr
+ #`(struct-field-ref #,accessor 'name)
+ type-ids packed? endianness)))
+ (loop #'rest-exprs
+ (cons (list (syntax->datum #'name) type) fields)
+ (max size (sizeof type))
+ (max alignment (alignof type))))))))
+ ;; Arrays
+ ((array length expr)
+ (and (identifier-eq? #'array 'array)
+ (exact-integer? (syntax->datum #'length)))
+ (let ((length (syntax->datum #'length))
+ (type (compute-type #'expr #`(array-type #,accessor)
+ type-ids packed? endianness)))
+ `(array ,(* (sizeof type) length) ,(alignof type) ,length ,type)))
+ ;; Pointers
+ ((pointer expr)
+ (identifier-eq? #'pointer '*)
+ (let ((size (ffi:sizeof '*))
+ (align (ffi:alignof '*)))
+ (let loop ((expr #'expr))
+ (syntax-case expr ()
+ ;; Void pointer
+ (void
+ (identifier-eq? #'void 'void)
+ `(pointer ,size ,align void))
+ ;; Primitive pointer
+ (prim
+ (identifier-memq? #'prim primitives)
+ `(pointer ,size ,align
+ ,(compute-type #'prim #f type-ids packed? endianness)))
+ ;; Pointer to a pointer
+ ((pointer expr)
+ (identifier-eq? #'pointer '*)
+ `(pointer ,size ,align ,(loop #'expr)))
+ ;; Recursive reference to a type within this type group.
+ (type-id
+ (any (lambda (id) (bound-identifier=? #'type-id id)) type-ids)
+ (let ((pointer `(pointer ,size ,align (recur ,#'type-id))))
+ ;; Make a note that the recursive reference needs to be
+ ;; made via mutation after all types in the group are
+ ;; defined.
+ (hashq-set! recursive-pointers
+ (syntax->datum #'type-id)
+ (cons accessor
+ (hashq-ref recursive-pointers
+ (syntax->datum #'type-id)
+ '())))
+ pointer))
+ ;; Reference to a type outside of this type group.
+ (type-id
+ (bstruct-descriptor-identifier? #'type-id)
+ `(pointer ,size ,align ,#'type-id))))))))
+ (define (type->syntax type)
+ (match type
+ ((? identifier? type)
+ type)
+ (('scalar size alignment native? type)
+ #`(make-scalar #,size #,alignment #,native? '#,(datum->syntax #f type)))
+ (('struct size alignment fields)
+ (let ((fields* (map (match-lambda
+ ((name offset type)
+ #`(make-struct-field '#,(datum->syntax #f name)
+ #,offset
+ #,(type->syntax type))))
+ fields)))
+ #`(make-struct #,size #,alignment (list #,@fields*))))
+ (('union size alignment fields)
+ (let ((fields* (map (match-lambda
+ ((name type)
+ #`(make-union-field '#,(datum->syntax #f name)
+ #,(type->syntax type))))
+ fields)))
+ #`(make-union #,size #,alignment (list #,@fields*))))
+ (('array size alignment length type)
+ #`(make-array #,size #,alignment #,length #,(type->syntax type)))
+ (('pointer size alignment ('recur type))
+ #`(make-pointer #,size #,alignment #f))
+ (('pointer size alignment 'void)
+ #`(make-pointer #,size #,alignment #f))
+ (('pointer size alignment type)
+ #`(make-pointer #,size #,alignment #,(type->syntax type)))))
+ (define (expand-accessor proc stx id op type group)
+ #`(syntax-case #,stx ()
+ (() #'#,(proc id 0)) ; self reference
+ ((elem :::)
+ #,(let loop ((stx #'#'(elem :::)) (type type) (offset 0))
+ (match type
+ ((? bstruct-descriptor-identifier? type)
+ ;; Recursively invoke macro for referenced type to produce
+ ;; the accessor.
+ #`(syntax-case #,stx ()
+ ((elem :::)
+ #'#,(proc #`(#,type #,(datum->syntax #f op) elem :::)
+ offset))))
+ (('scalar _ _ _ _)
+ #`(syntax-case #,stx ()
+ (() #'#,(proc type offset))))
+ (('struct _ _ fields)
+ #`(syntax-case #,stx ()
+ ((e elem :::)
+ (match (syntax->datum #'e)
+ #,@(map (match-lambda
+ ((name offset* type)
+ #`('#,(datum->syntax #f name)
+ #,(loop #'#'(elem :::) type
+ #`(+ #,offset #,offset*)))))
+ fields)
+ (_ #'(error "no such struct field" 'e))))))
+ (('union _ _ fields)
+ #`(syntax-case #,stx ()
+ ((e elem :::)
+ (match (syntax->datum #'e)
+ #,@(map (match-lambda
+ ((name type)
+ #`('#,(datum->syntax #f name)
+ #,(loop #'#'(elem :::) type offset))))
+ fields)
+ (_ #'(error "no such union field" 'e))))))
+ (('array _ _ length type)
+ ;; Need to generate a unique name here to capture
+ ;; the 'e' containing the array index expression as
+ ;; 'e' could get shadowed later.
+ (with-syntax (((i) (generate-temporaries '(i))))
+ #`(syntax-case #,stx ()
+ ((e elem :::)
+ (with-syntax ((i #'e))
+ #,(loop #'#'(elem :::)
+ type
+ #`(+ #,offset
+ (* (let ()
+ ;; if 'i' is a constant then
+ ;; these checks will be elided
+ ;; by the compiler.
+ (assert (exact-integer? i)
+ 'bstruct-accessor)
+ (assert (< -1 i #,length)
+ 'bstruct-accessor)
+ i)
+ #,(sizeof type)))))))))
+ ;; Void pointers can only be referenced, not
+ ;; dereferenced.
+ (('pointer _ _ 'void)
+ #`(syntax-case #,stx ()
+ (() #'#,(proc type offset))))
+ (('pointer _ _ type*)
+ (call-with-values (lambda ()
+ (match type*
+ (('recur type*)
+ (values type* #t))
+ (type*
+ (values type* #f))))
+ (lambda (type* recur?)
+ #`(let ()
+ (define (expand-pointer-accessor stx)
+ (syntax-case stx ()
+ ;; Pointer reference
+ (() #'#,(proc type offset))
+ ;; Pointer dereference. Sigh, it's
+ ;; complicated...
+ (((* index) elem :::)
+ (identifier-eq? #'* '*)
+ (let ((offset #,offset)
+ (e '#,(datum->syntax #f (target-endianness)))
+ (ptr-size #,(target-word-size))
+ ;; For recursive types, we don't
+ ;; yet have a defined macro to
+ ;; query for the size, so we have
+ ;; to look it up in the group
+ ;; alist.
+ (size #,(sizeof (if recur?
+ (assoc-ref group type*)
+ type*)))
+ (body #,(match type*
+ ((? syntax?)
+ #`#'#,(proc #`(#,type*
+ #,(datum->syntax #f op)
+ elem :::)
+ 0))
+ (_
+ (loop #'#'(elem :::) type* 0)))))
+ ;; 'bv' and 'i' are the lexical
+ ;; variables containing the
+ ;; bytevector and offset for
+ ;; getting/setting. Every time we
+ ;; encounter a pointer dereference we
+ ;; need to shadow the old variables
+ ;; with new ones.
+ #`(let* ((e '#,(datum->syntax #f e))
+ (base (bytevector-sint-ref bv (+ i #,offset)
+ e #,ptr-size))
+ (address (+ base (* #,size index)))
+ (ptr (ffi:make-pointer address))
+ (bv (ffi:pointer->bytevector ptr #,size))
+ (i 0))
+ #,body)))
+ ;; Pointer dereference with implied
+ ;; index of 0.
+ ((* elem :::)
+ (identifier-eq? #'* '*)
+ (expand-pointer-accessor #'((* 0) elem :::)))))
+ (expand-pointer-accessor #,stx))))))))))
+ (define (macro-for-type id type-id type group)
+ (define (self-identifier? other)
+ (eq? id other))
+ (let ((self-size (sizeof type)))
+ #`(define-syntax #,id
+ (with-ellipsis :::
+ (lambda (stx)
+ ;; Since syntax transformers are procedures, we can
+ ;; stash useful information in procedure properties that
+ ;; 'define-bstruct' can use if/when a bstruct type
+ ;; is referenced within another type definition.
+ #((bstruct? . #t)
+ (bstruct-size . #,(sizeof type))
+ (bstruct-alignment . #,(alignof type)))
+ (syntax-case stx ()
+ ;; Identifier syntax to provide the illusion that this
+ ;; macro is just an ordinary variable.
+ (self
+ (identifier? #'self)
+ #'#,type-id)
+ ;; Private interface for code generation.
+ ((_ offset elem :::)
+ (identifier-eq? #'offset 'offset)
+ #`(lambda (bv i)
+ #,#,(expand-accessor
+ (lambda (type offset)
+ (match type
+ ((and sub-offset ((? syntax?) ...))
+ #`(#,sub-offset bv (+ i #,offset)))
+ (_ #`(values bv #,offset))))
+ #'#'(elem :::) id 'offset type group)))
+ ((_ getter elem :::)
+ (identifier-eq? #'getter 'getter)
+ #`(lambda (bv i)
+ #,#,(expand-accessor
+ (lambda (type offset)
+ (match type
+ ((or (? self-identifier?)
+ (? bstruct-descriptor-identifier?))
+ #`(make-bstruct #,type bv (+ i #,offset)))
+ ((and sub-get ((? syntax?) ...))
+ #`(#,sub-get bv (+ i #,offset)))
+ (('pointer _ _ 'void)
+ #`(ffi:make-pointer
+ (bytevector-sint-ref bv (+ i #,offset)
+ '#,(datum->syntax
+ #f (target-endianness))
+ #,(target-word-size))))
+ (('pointer size _ (or ('recur type) type))
+ #`(ffi:make-pointer
+ (bytevector-sint-ref bv (+ i #,offset)
+ '#,(datum->syntax
+ #f (target-endianness))
+ #,(target-word-size))))
+ (('scalar size align native? type)
+ #`(#,(primitive-getter size native? type)
+ bv (+ i #,offset)))))
+ #'#'(elem :::) id 'getter type group)))
+ ((_ setter elem :::)
+ (identifier-eq? #'setter 'setter)
+ #`(lambda (bv i x)
+ #,#,(expand-accessor
+ (lambda (type offset)
+ (match type
+ ((? self-identifier?)
+ #`(match x
+ (($ #,type src j)
+ (bytevector-copy! src j bv
+ (+ i #,offset)
+ #,self-size))))
+ ((? bstruct-descriptor-identifier?)
+ #`(match x
+ (($ #,type src j)
+ (bytevector-copy! src j bv
+ (+ i #,offset)
+ #,(sizeof type)))))
+ ((and sub-set! ((? syntax?) ...))
+ #`(#,sub-set! bv (+ i #,offset) x))
+ (('pointer size _ 'void)
+ #`(bytevector-sint-set! bv (+ i #,offset)
+ (ffi:pointer-address x)
+ '#(datum->syntax
+ #f (target-endianness))
+ #,(target-word-size)))
+ (('pointer size _ (or ('recur type) type))
+ #`(bytevector-sint-set! bv (+ i #,offset)
+ (ffi:pointer-address x)
+ '#(datum->syntax
+ #f (target-endianness))
+ #,(target-word-size)))
+ (('scalar size align native? type)
+ (let ((setter (primitive-setter size native? type)))
+ #`(#,setter bv (+ i #,offset) x)))))
+ #'#'(elem :::) id 'setter type group)))))))))
+ (syntax-case stx ()
+ ;; Type group definition. Types defined in the same group can
+ ;; contain recursive pointer references to each other.
+ ((_ (id expr . kwargs) ...)
+ (not (null? #'((id expr) ...)))
+ (with-syntax (((type-id ...)
+ (map (lambda (stx)
+ (datum->syntax stx (gensym "bstruct-type-")))
+ #'(id ...))))
+ (let* ((ids #'(id ...))
+ (type-ids #'(type-id ...))
+ (kwargs #'(kwargs ...))
+ (types (map (lambda (id expr)
+ (compute-type expr
+ #`(bstruct-descriptor-type #,id)
+ ids #f (target-endianness)))
+ #'(id ...)
+ #'(expr ...)))
+ (group (map cons #'(id ...) types)))
+ #`(begin
+ ;; First, define the descriptors using gensym'd names.
+ #,@(map (lambda (id temp-id kwargs type)
+ #`(define #,temp-id
+ (make-bstruct-descriptor '#,id
+ #,(type->syntax type)
+ #,@kwargs)))
+ ids type-ids kwargs types)
+ ;; Then tie the knot for recursive pointer types.
+ #,@(append-map (lambda (id)
+ (map (lambda (accessor)
+ #`(set-pointer-type! #,accessor #,id))
+ (hashq-ref recursive-pointers
+ (syntax->datum id) '())))
+ ids)
+ ;; Finally, define macros using the true names that
+ ;; wrap the gensym'd variables.
+ #,@(map (lambda (id type-id type)
+ (macro-for-type id type-id type group))
+ ids type-ids types)))))
+ ;; A single type definition is a type group of one.
+ ((_ id expr . kwargs)
+ #'(define-bstruct (id expr . kwargs))))))