;;; guile-bstruct -- Binary structures for Guile ;;; Copyright © 2024 David Thompson ;;; ;;; Licensed under the Apache License, Version 2.0 (the "License"); ;;; you may not use this file except in compliance with the License. ;;; You may obtain a copy of the License at ;;; ;;; http://www.apache.org/licenses/LICENSE-2.0 ;;; ;;; Unless required by applicable law or agreed to in writing, software ;;; distributed under the License is distributed on an "AS IS" BASIS, ;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ;;; See the License for the specific language governing permissions and ;;; limitations under the License. ;;; Commentary: ;; ;; A bstruct is a data type that encapsulates a bytevector and an ;; offset which interprets that bytevector based on a given layout. ;; Bstructs are useful for manipulating C structs when using the FFI, ;; or interpreting/manipulating packed binary data such as GPU vertex ;; buffers, or for data types that benefit from unboxed math ;; optimizations such as vectors and matrices. ;; ;; Inspired by guile-opengl's define-packed-struct and based on ;; "Ftypes: Structured foreign types" by Andy Keep and R. Kent Dybvig. ;; ;; http://scheme2011.ucombinator.org/papers/Keep2011.pdf ;; ;; Features: ;; ;; - Efficiency through macro magic. Procedural macros generate tasty ;; code for the compiler to munch on and emit efficient bytecode. ;; Runtime checks are reduced to the bare minimum. ;; ;; - Raw bytevector access mode. Easily read/write structured data ;; from/to raw bytevectors without going through an intermediary ;; struct. Very useful for batch processing in situations when the ;; overhead of creating wrapper structs would hinder throughput. ;; ;; - Cache friendly. In performance sensitive code, many bstructs can ;; be stored in contiguous memory by pre-allocating a large bytevector ;; for the underlying storage. Individual bstruct handles simply ;; point at different offsets. ;; ;;; Code: (define-module (bstruct) #:use-module (ice-9 exceptions) #:use-module (ice-9 match) #:use-module (rnrs bytevectors) #:use-module (srfi srfi-1) #:use-module (srfi srfi-9) #:use-module (srfi srfi-11) #:use-module (system base target) #:use-module ((system foreign) #:prefix ffi:) #:use-module (system syntax) #:export (bstruct? bstruct-=? bstruct-type bstruct-length bstruct->sexp bstruct->pointer pointer->bstruct bstruct-wrap bstruct-unwrap bstruct-alloc bstruct-sizeof bstruct-alignof bstruct-ref bstruct-set! bstruct-unpack bstruct-pack! bstruct-copy bstruct-copy! define-bstruct)) (define-record-type (make-scalar size alignment native? type) scalar? (size scalar-size) (alignment scalar-alignment) (native? scalar-native?) ; native endianness? (type scalar-type)) (define-record-type (make-struct-field name offset type) struct-field? (name struct-field-name) (offset struct-field-offset) (type struct-field-type)) (define-record-type (make-struct size alignment fields) %struct? (size struct-size) (alignment struct-alignment) (fields struct-fields)) (define-record-type (make-union-field name type) union-field? (name union-field-name) (type union-field-type)) (define-record-type (make-union size alignment fields) union? (size union-size) (alignment union-alignment) (fields union-fields)) (define-record-type (make-array size alignment length type) array? (size array-size) (alignment array-alignment) (length array-length) (type array-type)) (define-record-type (make-pointer size alignment type) pointer? (size pointer-size) (alignment pointer-alignment) ;; Mutable for recursive types. (type pointer-type set-pointer-type!)) (define-record-type (make-opaque) opaque?) ;; TODO: functions ;; TODO: bitfields (define (struct-field-ref struct name) (and=> (find (match-lambda (($ name*) (eq? name name*))) (struct-fields struct)) struct-field-type)) (define (union-field-ref struct name) (and=> (find (match-lambda (($ name*) (eq? name name*))) (union-fields struct)) union-field-type)) (define (bstruct-type? obj) (or (scalar? obj) (%struct? obj) (union? obj) (array? obj) (pointer? obj) (opaque? obj))) (define (sizeof type) (match type ((or ($ size) ($ size) ($ size) ($ size) ($ size)) size) (($ ) (raise-exception (make-exception (make-exception-with-message "cannot get size of opaque type") (make-exception-with-origin 'sizeof) (make-exception-with-irritants (list type))))))) (define (alignof type) (match type ((or ($ _ alignment) ($ _ alignment) ($ _ alignment) ($ _ alignment) ($ _ alignment)) alignment) (($ ) (raise-exception (make-exception (make-exception-with-message "cannot get alignment of opaque type") (make-exception-with-origin 'sizeof) (make-exception-with-irritants (list type))))))) (define-syntax-rule (assert expr who) (unless expr (raise-exception (make-exception (make-assertion-failure) (make-exception-with-origin who) (make-exception-with-irritants '(expr)))))) (define-syntax-rule (check-size i n who) (assert (and (>= i 0) (< i n)) who)) (define-inlinable (u64? x) (and (exact-integer? x) (<= 0 x (ash 1 64)))) ;; Bstructs form a shallow vtable hierarchy. (define (make-vtable (string-append standard-vtable-fields "pwpw") (lambda (desc port) (format port "#" (object-address desc))))) (define (bstruct-descriptor-name descriptor) (struct-ref descriptor vtable-offset-user)) (define (bstruct-descriptor-type descriptor) (struct-ref descriptor (+ vtable-offset-user 1))) (define (bstruct-descriptor-sizeof descriptor) (sizeof (bstruct-descriptor-type descriptor))) (define (print-bstruct bs port) (format port "#<~a ~a>" (bstruct-descriptor-name (struct-vtable bs)) (bstruct->sexp bs))) (define* (make-bstruct-descriptor name type #:key (printer print-bstruct)) (assert (bstruct-type? type) 'make-bstruct-descriptor) (make-struct/no-tail (make-struct-layout "pwpwpw") printer name type)) (define (bstruct-descriptor? obj) (and (struct? obj) (eq? (struct-vtable obj) ))) (define (%bstruct? obj) (and (struct? obj) (bstruct-descriptor? (struct-vtable obj)))) (define (bstruct-bytevector bs) (assert (%bstruct? bs) 'bstruct-bytevector) (struct-ref bs 0)) (define (bstruct-offset bs) (assert (%bstruct? bs) 'bstruct-bytevector) (struct-ref bs 1)) (define (bstruct-length bs) (assert (%bstruct? bs) 'bstruct-bytevector) (struct-ref bs 2)) (define (bstruct-type bs) (assert (%bstruct? bs) 'bstruct-bytevector) (bstruct-descriptor-type (struct-vtable bs))) ;; Bstructs are composed of a type descriptor, a bytevector that ;; provides storage, an offset pointing to the start of the struct ;; data within that bytevector, and the number of contiguous structs ;; within. ;; ;; TODO: We could use Guile's bytevector slices here, however they ;; were only introduced in 3.0.9 so we can't rely on them. (define (%make-bstruct descriptor bv offset n) (make-struct/no-tail descriptor bv offset n)) (define (make-bstruct descriptor bv offset n) (assert (bstruct-descriptor? descriptor) 'make-bstruct) (assert (bytevector? bv) 'make-bstruct) (assert (exact-integer? offset) 'make-bstruct) (assert (>= offset 0) 'make-bstruct) (assert (<= (+ offset (* (bstruct-descriptor-sizeof descriptor) n)) (bytevector-length bv)) 'make-bstruct) (%make-bstruct descriptor bv offset n)) ;; Platform ABI details (define (non-target-endianness) (if (eq? (target-endianness) (endianness little)) (endianness big) (endianness little))) (define (sizeof* type) (match type ((or 'u8 's8) 1) ((or 'u16 's16) 2) ((or 'u32 's32 'f32) 4) ((or 'u64 's64 'f64) 8) (_ (if (string=? %host-type (target-type)) (match type ('uint (ffi:sizeof ffi:unsigned-int)) ('int (ffi:sizeof ffi:int)) ('ulong (ffi:sizeof ffi:unsigned-long)) ('long (ffi:sizeof ffi:long)) ('ushort (ffi:sizeof ffi:unsigned-short)) ('short (ffi:sizeof ffi:short)) ('size_t (ffi:sizeof ffi:size_t)) ('ssize_t (ffi:sizeof ffi:ssize_t)) ('ptrdiff_t (ffi:sizeof ffi:ptrdiff_t)) ('intptr_t (ffi:sizeof ffi:intptr_t)) ('uintptr_t (ffi:sizeof ffi:uintptr_t))) ;; FIXME: Fill in with proper ABI details. We will lazily ;; evaluate this work when we hit a problem in a cross build. (match type ('uint 4) ('int 4) ('ulong 8) ('long 8) ('ushort 2) ('short 2) ('size_t (target-word-size)) ('ssize_t (target-word-size)) ('ptrdiff_t (target-word-size)) ('intptr_t (target-word-size)) ('uintptr_t (target-word-size))))))) (define (alignof* type) (match type ((or 'u8 's8) 1) ((or 'u16 's16) 2) ((or 'u32 's32 'f32) 4) ((or 'u64 's64 'f64) 8) (_ (if (string=? %host-type (target-type)) (match type ('uint (ffi:sizeof ffi:unsigned-int)) ('int (ffi:sizeof ffi:int)) ('ulong (ffi:sizeof ffi:unsigned-long)) ('long (ffi:sizeof ffi:long)) ('ushort (ffi:sizeof ffi:unsigned-short)) ('short (ffi:sizeof ffi:short)) ('size_t (ffi:sizeof ffi:size_t)) ('ssize_t (ffi:sizeof ffi:ssize_t)) ('ptrdiff_t (ffi:sizeof ffi:ptrdiff_t)) ('intptr_t (ffi:sizeof ffi:intptr_t)) ('uintptr_t (ffi:sizeof ffi:uintptr_t)) ('* (ffi:sizeof '*))) (match type ('uint 4) ('int 4) ('ulong 8) ('long 8) ('ushort 2) ('short 2) ('size_t (target-word-size)) ('ssize_t (target-word-size)) ('ptrdiff_t (target-word-size)) ('intptr_t (target-word-size)) ('uintptr_t (target-word-size)) ('* (target-word-size))))))) ;; It is useful to see bstructs in s-expression form when working ;; at the REPL. (define (bstruct->sexp bs) (let ((bv (bstruct-bytevector bs))) (let loop ((type (bstruct-type bs)) (offset (bstruct-offset bs))) (match type ((? bstruct-descriptor? desc) (loop (bstruct-descriptor-type desc) offset)) (($ _ _ native? type) (let ((e (if native? (target-endianness) (non-target-endianness)))) (match type ('u8 (bytevector-u8-ref bv offset)) ('s8 (bytevector-s8-ref bv offset)) ('u16 (bytevector-u16-ref bv offset e)) ('s16 (bytevector-s16-ref bv offset e)) ('u32 (bytevector-u32-ref bv offset e)) ('s32 (bytevector-s32-ref bv offset e)) ('u64 (bytevector-u64-ref bv offset e)) ('s64 (bytevector-s64-ref bv offset e)) ('f32 (bytevector-ieee-single-ref bv offset e)) ('f64 (bytevector-ieee-double-ref bv offset e)) ('uint (bytevector-uint-ref bv offset e (ffi:sizeof ffi:unsigned-int))) ('int (bytevector-sint-ref bv offset e (ffi:sizeof ffi:int))) ('ulong (bytevector-uint-ref bv offset e (ffi:sizeof ffi:unsigned-long))) ('long (bytevector-sint-ref bv offset e (ffi:sizeof ffi:long))) ('ushort (bytevector-uint-ref bv offset e (ffi:sizeof ffi:unsigned-short))) ('short (bytevector-sint-ref bv offset e (ffi:sizeof ffi:short))) ('size_t (bytevector-uint-ref bv offset e (ffi:sizeof ffi:size_t))) ('ssize_t (bytevector-sint-ref bv offset e (ffi:sizeof ffi:ssize_t))) ('ptrdiff_t (bytevector-sint-ref bv offset e (ffi:sizeof ffi:ptrdiff_t))) ('intptr_t (bytevector-sint-ref bv offset e (ffi:sizeof ffi:intptr_t))) ('uintptr_t (bytevector-uint-ref bv offset e (ffi:sizeof ffi:uintptr_t)))))) (($ _ _ fields) `(struct ,@(map (match-lambda (($ name offset* type) (list name (loop type (+ offset offset*))))) fields))) (($ _ _ fields) `(union ,@(map (match-lambda (($ name type) (list name (loop type offset)))) fields))) (($ _ _ length type) (let ((size (sizeof type))) `(array ,@(map (lambda (i) (loop type (+ offset (* i size)))) (iota length))))) (($ ) `(* ,(bytevector-uint-ref bv offset (native-endianness) (ffi:sizeof '*)))))))) ;; Macro helpers that use metadata attached to bstruct syntax ;; transformers. (define (bstruct-descriptor-identifier? id) (and (identifier? id) (let-values (((kind val) (syntax-local-binding id))) (and (eq? kind 'macro) (procedure-property val 'bstruct?))))) (define (non-opaque-bstruct-descriptor-identifier? id) (and (identifier? id) (let-values (((kind val) (syntax-local-binding id))) (and (eq? kind 'macro) (procedure-property val 'bstruct?) (not (procedure-property val 'bstruct-opaque?)))))) (define (bstruct-descriptor-identifier-size id) (let-values (((_ transformer) (syntax-local-binding id))) (procedure-property transformer 'bstruct-size))) (define (bstruct-descriptor-identifier-alignment id) (let-values (((_ transformer) (syntax-local-binding id))) (procedure-property transformer 'bstruct-alignment))) ;; A predicate that can answer the questions: ;; 1) Is this *any kind* of bstruct? ;; 2) Is this *a specific kind* of bstruct? (define-syntax bstruct? (lambda (stx) (syntax-case stx () (x (identifier? #'x) #'(case-lambda ((obj) (%bstruct? obj)) (( obj) (match obj (($ ) #t) (_ #f))))) ((_ obj) #'(%bstruct? obj)) ((_ obj) (bstruct-descriptor-identifier? #') #'(match obj (($ ) #t) (_ #f)))))) (define-syntax bstruct-=? (lambda (stx) (syntax-case stx () ((_ a b) (bstruct-descriptor-identifier? #') #'(match a (($ bv-a offset-a) (match b (($ bv-b offset-b) (let ((n (bstruct-sizeof ))) (let loop ((i 0)) (cond ((= i n) #t) ((= (bytevector-u8-ref bv-a (+ offset-a i)) (bytevector-u8-ref bv-b (+ offset-b i))) (loop (+ i 1))) (else #f)))))))))))) ;; Create a pointer to the bstruct or some element within the bstruct. (define-syntax bstruct->pointer (lambda (stx) (syntax-case stx () ((_ ( i) bs) (bstruct-descriptor-identifier? #') #'(match bs (($ bv (? u64? offset)) (ffi:bytevector->pointer bv offset)))) ((_ ( i) bs (elem ...)) (bstruct-descriptor-identifier? #') #'(match bs (($ bv (? u64? offset) (? u64? n)) (check-size i n 'bstruct->pointer) (let ((offset (+ offset (* (bstruct-sizeof ) i)))) (call-with-values (lambda () (( offset elem ...) bv offset)) ffi:bytevector->pointer))))) ((_ ( i) bs elem) #'(bstruct->pointer ( i) bs (elem))) ((_ bs) #'(bstruct->pointer ( 0) bs)) ((_ bs (elem ...)) #'(bstruct->pointer ( 0) bs (elem ...))) ((_ bs elem) #'(bstruct->pointer ( 0) bs (elem)))))) (define-syntax pointer->bstruct (lambda (stx) (syntax-case stx () ((_ ptr n) (non-opaque-bstruct-descriptor-identifier? #') #'(let ((size (* (bstruct-sizeof ) n))) (make-bstruct (ffi:pointer->bytevector ptr size) 0 n))) ((_ ptr) #'(pointer->bstruct ptr 1))))) ;; Wrap a bytevector in a bstruct. (define-syntax bstruct-wrap (lambda (stx) (syntax-case stx () ((_ bv offset n) (non-opaque-bstruct-descriptor-identifier? #') #'(make-bstruct bv offset n)) ((_ bv offset) #'(bstruct-wrap bv offset 1))))) ;; Unwrap a bstruct to a bytevector + offset + count. (define-syntax bstruct-unwrap (lambda (stx) (syntax-case stx () ((_ bs) (bstruct-descriptor-identifier? #') #`(match bs (($ bv offset n) (values bv offset n))))))) ;; Size/align queries. (define-syntax bstruct-sizeof (lambda (stx) (syntax-case stx () ((_ ) (non-opaque-bstruct-descriptor-identifier? #') (bstruct-descriptor-identifier-size #'))))) (define-syntax bstruct-alignof (lambda (stx) (syntax-case stx () ((_ ) (non-opaque-bstruct-descriptor-identifier? #') (bstruct-descriptor-identifier-alignment #'))))) ;; 'bstruct-pack!' and 'bstruct-unpack' allow for directly ;; interpreting bytevector contents as structured data. (define-syntax bstruct-pack! (lambda (stx) (define (flatten-elems stx) (append-map (lambda (stx) (syntax-case stx (->) ((-> root-elem sub-elems ...) (identifier? #'root-elem) (map (lambda (stx) (syntax-case stx () (((sub-elem ...) val) #'((root-elem sub-elem ...) val)))) (flatten-elems #'(sub-elems ...)))) (((elem ...) val) #'(((elem ...) val))) ((elem val) #'(((elem) val))))) stx)) (syntax-case stx () ((_ bv i elem ...) (non-opaque-bstruct-descriptor-identifier? #') (with-syntax (((((elem ...) val) ...) (flatten-elems #'(elem ...)))) #'(begin (( setter elem ...) bv i val) ...)))))) (define-syntax bstruct-unpack (lambda (stx) (syntax-case stx () ((_ bv i elem ...) (non-opaque-bstruct-descriptor-identifier? #') #`(values #,@(map (lambda (elem) (syntax-case elem () ((e ...) #'(( getter e ...) bv i)) (e #'(( getter e) bv i)))) #'(elem ...))))))) ;; Allocate a fresh bstruct that wraps a fresh bytevector big ;; enough to store the entire structure. (define-syntax bstruct-alloc (syntax-rules () ((_ ( n) (i elem ...) ...) (let* ((size (bstruct-sizeof )) (len (* size n)) (bv (make-bytevector len 0))) (bstruct-pack! bv (* size i) elem ...) ... (bstruct-wrap bv 0))) ((_ elem ...) (bstruct-alloc ( 1) (0 elem ...))))) ;; Return the value of some elements. (define-syntax bstruct-ref (syntax-rules () ((_ ( i) bs elem ...) ; array (match bs (($ bv (? u64? offset) (? u64? n)) (assert (u64? i) 'bstruct-ref) (check-size i n 'bstruct-ref) (let ((offset (+ offset (* (bstruct-sizeof ) i)))) (bstruct-unpack bv offset elem ...))))) ((_ bs elem ...) (bstruct-ref ( 0) bs elem ...)))) ;; Set the value of some elements. (define-syntax bstruct-set! (syntax-rules () ((_ ( i) bs elem ...) ; array (match bs (($ bv (? u64? offset) (? u64? n)) (assert (u64? i) 'bstruct-set!) (check-size i n 'bstruct-set!) (let ((offset (+ offset (* (bstruct-sizeof ) i)))) (bstruct-pack! bv offset elem ...))))) ((_ bs elem ...) (bstruct-set! ( 0) bs elem ...)))) ;; Imperative/functional struct copying. (define-syntax-rule (bstruct-copy! src dst) (match src (($ src-bv (? u64? src-offset) (? u64? src-n)) (match dst (($ dst-bv (? u64? dst-offset) (? u64? dst-n)) (check-size src-n (1+ dst-n) 'bstruct-copy!) (bytevector-copy! src-bv src-offset dst-bv dst-offset (* (bstruct-sizeof ) src-n))))))) (define-syntax-rule (bstruct-copy src) (match src (($ _ _ (? u64? n)) (let ((dst (bstruct-alloc ( n)))) (bstruct-copy! src dst) dst)))) (define (identifier-eq? stx sym) (and (identifier? stx) (eq? (syntax->datum stx) sym))) ;; The big gnarly procedural macro! Buckle up! (define-syntax define-bstruct (lambda (stx) (define primitives '(u8 s8 u16 s16 u32 s32 u64 s64 f32 f64 int uint long ulong short ushort size_t ssize_t ptrdiff_t intptr_t uintptr_t)) (define (target-endianness? e) (eq? e (target-endianness))) (define (resolve-endianness e) (match e ('native (target-endianness)) ('non-native (non-target-endianness)) (_ e))) (define (identifier-memq? stx syms) (and (identifier? stx) (memq (syntax->datum stx) syms))) ;; Primitive getter/setter helpers (define (ref/endianness proc endianness) #`(lambda (bv i) (#,proc bv i #,endianness))) (define (set!/endianness proc endianness) #`(lambda (bv i x) (#,proc bv i x #,endianness))) (define (uint-ref size endianness) #`(lambda (bv i) (bytevector-uint-ref bv i '#,(datum->syntax #f endianness) #,size))) (define (uint-set! size endianness) #`(lambda (bv i x) (bytevector-uint-set! bv i x '#,(datum->syntax #f endianness) #,size))) (define (sint-ref size endianness) #`(lambda (bv i) (bytevector-sint-ref bv i '#,(datum->syntax #f endianness) #,size))) (define (sint-set! size endianness) #`(lambda (bv i x) (bytevector-sint-set! bv i x '#,(datum->syntax #f endianness) #,size))) ;; Scalar types are divided into two categories: machine indepenent ;; and machine dependent. The machine independent types (i32, f32, ;; etc.) have a known size and alignment on all platforms. The ;; machine dependent types have a size and alignment that can vary ;; depending on the ABI of the system that is compiling the code. (define (abi-ref type e) ; e for endianness (match type ('uint (uint-ref (sizeof* 'uint) e)) ('int (sint-ref (sizeof* 'int) e)) ('ulong (uint-ref (sizeof* 'ulong) e)) ('long (sint-ref (sizeof* 'long) e)) ('ushort (uint-ref (sizeof* 'ushort) e)) ('short (sint-ref (sizeof* 'short) e)) ('size_t (uint-ref (sizeof* 'size_t) e)) ('ssize_t (sint-ref (sizeof* 'ssize_t) e)) ('ptrdiff_t (sint-ref (sizeof* 'ptrdiff_t) e)) ('intptr_t (sint-ref (sizeof* 'intptr_t) e)) ('uintptr_t (uint-ref (sizeof* 'uintptr_t) e)))) (define (abi-set! type e) (match type ('uint (uint-set! (sizeof* 'uint) e)) ('int (sint-set! (sizeof* 'int) e)) ('ulong (uint-set! (sizeof* 'ulong) e)) ('long (sint-set! (sizeof* 'long) e)) ('ushort (uint-set! (sizeof* 'ushort) e)) ('short (sint-set! (sizeof* 'short) e)) ('size_t (uint-set! (sizeof* 'size_t) e)) ('ssize_t (sint-set! (sizeof* 'ssize_t) e)) ('ptrdiff_t (sint-set! (sizeof* 'ptrdiff_t) e)) ('intptr_t (sint-set! (sizeof* 'intptr_t) e)) ('uintptr_t (uint-set! (sizeof* 'uintptr_t) e)))) (define (primitive-getter size native? type) (if native? (match type ('u8 #'bytevector-u8-ref) ('s8 #'bytevector-s8-ref) ('u16 #'bytevector-u16-native-ref) ('s16 #'bytevector-s16-native-ref) ('u32 #'bytevector-u32-native-ref) ('s32 #'bytevector-s32-native-ref) ('u64 #'bytevector-u64-native-ref) ('s64 #'bytevector-s64-native-ref) ('f32 #'bytevector-ieee-single-native-ref) ('f64 #'bytevector-ieee-double-native-ref) (_ (abi-ref type (target-endianness)))) (let ((e (non-target-endianness))) (match type ('u8 #'bytevector-u8-ref) ('s8 #'bytevector-s8-ref) ('u16 (ref/endianness #'bytevector-u16-ref e)) ('s16 (ref/endianness #'bytevector-s16-ref e)) ('u32 (ref/endianness #'bytevector-u32-ref e)) ('s32 (ref/endianness #'bytevector-s32-ref e)) ('u64 (ref/endianness #'bytevector-u64-ref e)) ('s64 (ref/endianness #'bytevector-s64-ref e)) ('f32 (ref/endianness #'bytevector-ieee-single-ref e)) ('f64 (ref/endianness #'bytevector-ieee-double-ref e)) (_ (abi-ref type e)))))) (define (primitive-setter size native? type) (if native? (match type ('u8 #'bytevector-u8-set!) ('s8 #'bytevector-s8-set!) ('u16 #'bytevector-u16-native-set!) ('s16 #'bytevector-s16-native-set!) ('u32 #'bytevector-u32-native-set!) ('s32 #'bytevector-s32-native-set!) ('u64 #'bytevector-u64-native-set!) ('s64 #'bytevector-s64-native-set!) ('f32 #'bytevector-ieee-single-native-set!) ('f64 #'bytevector-ieee-double-native-set!) (_ (abi-set! type (target-endianness)))) (let ((e (non-target-endianness))) (match type ('u8 #'bytevector-u8-set!) ('s8 #'bytevector-s8-set!) ('u16 (set!/endianness #'bytevector-u16-set! e)) ('s16 (set!/endianness #'bytevector-s16-set! e)) ('u32 (set!/endianness #'bytevector-u32-set! e)) ('s32 (set!/endianness #'bytevector-s32-set! e)) ('u64 (set!/endianness #'bytevector-u64-set! e)) ('s64 (set!/endianness #'bytevector-s64-set! e)) ('f32 (set!/endianness #'bytevector-ieee-single-set! e)) ('f64 (set!/endianness #'bytevector-ieee-double-set! e)) (_ (abi-set! type e)))))) ;; Types can be recursive by referencing a type name within the ;; same type group in a pointer expression. ;; ;; For example: ;; ;; (define-bstruct linked-list ;; (struct (item int) (next (* linked-list)))) ;; ;; To make this work, we keep a side table mapping type names to ;; pointer type accessor forms that need to be patched with a ;; cyclical reference *after* all the types are defined. (define recursive-pointers (make-hash-table)) (define (sizeof type) (match type ((or ('scalar size _ _ _) ('struct size _ _) ('union size _ _) ('array size _ _ _) ('pointer size _ _)) size) (('opaque) 0) ((? bstruct-descriptor-identifier?) (bstruct-descriptor-identifier-size type)))) (define (alignof type) (match type ((or ('scalar _ align _ _) ('struct _ align _) ('union _ align _) ('array _ align _ _) ('pointer _ align _)) align) (('opaque) 0) ((? bstruct-descriptor-identifier?) (bstruct-descriptor-identifier-alignment type)))) (define (compute-type expr accessor type-ids packed? endianness) (syntax-case expr () ;; Modifiers ((packed expr) (identifier-eq? #'packed 'packed) (compute-type #'expr accessor type-ids #t endianness)) ((unpacked expr) (identifier-eq? #'unpacked 'unpacked) (compute-type #'expr accessor type-ids #f endianness)) ((endian e expr) (and (identifier-eq? #'endian 'endian) (identifier-memq? #'e '(native non-native big little))) (compute-type #'expr accessor type-ids packed? (resolve-endianness (syntax->datum #'e)))) ;; Previously defined types. 'packed?' and 'endianness' do ;; not apply here. (type-id (bstruct-descriptor-identifier? #'type-id) #'type-id) ;; Primitive scalars (primitive (identifier-memq? #'primitive primitives) (let ((type (syntax->datum #'primitive))) `(scalar ,(sizeof* type) ,(alignof* type) ,(target-endianness? endianness) ,type))) ;; Structs ((struct (field-name field-expr) ...) (and (identifier-eq? #'struct 'struct) (every identifier? #'(field-name ...))) (let loop ((field-exprs #'((field-name field-expr) ...)) (fields '()) (offset 0) (alignment 0)) (syntax-case field-exprs () (() ;; Round up to a multiple of alignment to get final ;; size. (let ((size (* (ceiling (/ offset alignment)) alignment))) `(struct ,size ,alignment ,(reverse fields)))) ;; An underscore indicates a pseudo-field that is just ;; for padding. It is not included in the struct field ;; list and just adds to the offset. (((underscore expr) . rest-exprs) (identifier-eq? #'underscore '_) (let ((type (compute-type #'expr #f type-ids packed? endianness))) (loop #'rest-exprs fields (+ offset (sizeof type)) alignment))) (((name expr) . rest-exprs) (let* ((type (compute-type #'expr #`(struct-field-ref #,accessor 'name) type-ids packed? endianness)) (field-alignment (alignof type)) (padding (if packed? 0 (modulo (- field-alignment (modulo offset field-alignment)) field-alignment))) (offset (+ offset padding)) (alignment (max alignment field-alignment))) (loop #'rest-exprs (cons (list (syntax->datum #'name) offset type) fields) (+ offset (sizeof type)) alignment)))))) ;; Unions ((union (field-name field-expr) ...) (and (identifier-eq? #'union 'union) (every identifier? #'(field-name ...))) (let loop ((field-exprs #'((field-name field-expr) ...)) (fields '()) (size 0) (alignment 0)) (syntax-case field-exprs () (() `(union ,size ,alignment ,(reverse fields))) (((underscore expr) . rest-exprs) (identifier-eq? #'underscore '_) (let ((type (compute-type #'expr #f type-ids packed? endianness))) (loop #'rest-exprs fields (max size (sizeof type)) alignment))) (((name expr) . rest-exprs) (let ((type (compute-type #'expr #`(struct-field-ref #,accessor 'name) type-ids packed? endianness))) (loop #'rest-exprs (cons (list (syntax->datum #'name) type) fields) (max size (sizeof type)) (max alignment (alignof type)))))))) ;; Arrays ((array length expr) (and (identifier-eq? #'array 'array) (exact-integer? (syntax->datum #'length))) (let ((length (syntax->datum #'length)) (type (compute-type #'expr #`(array-type #,accessor) type-ids packed? endianness))) `(array ,(* (sizeof type) length) ,(alignof type) ,length ,type))) ;; Pointers ((pointer expr) (identifier-eq? #'pointer '*) (let ((size (ffi:sizeof '*)) (align (ffi:alignof '*))) (let loop ((expr #'expr)) (syntax-case expr () ;; Void pointer (void (identifier-eq? #'void 'void) `(pointer ,size ,align void)) ;; Primitive pointer (prim (identifier-memq? #'prim primitives) `(pointer ,size ,align ,(compute-type #'prim #f type-ids packed? endianness))) ;; Pointer to a pointer ((pointer expr) (identifier-eq? #'pointer '*) `(pointer ,size ,align ,(loop #'expr))) ;; Recursive reference to a type within this type group. (type-id (any (lambda (id) (bound-identifier=? #'type-id id)) type-ids) (let ((pointer `(pointer ,size ,align (recur ,#'type-id)))) ;; Make a note that the recursive reference needs to be ;; made via mutation after all types in the group are ;; defined. (hashq-set! recursive-pointers (syntax->datum #'type-id) (cons accessor (hashq-ref recursive-pointers (syntax->datum #'type-id) '()))) pointer)) ;; Reference to a type outside of this type group. (type-id (bstruct-descriptor-identifier? #'type-id) `(pointer ,size ,align ,#'type-id)))))) ;; Opaque types (opaque (identifier-eq? #'opaque 'opaque) '(opaque)))) (define (type->syntax type) (match type ((? identifier? type) type) (('scalar size alignment native? type) #`(make-scalar #,size #,alignment #,native? '#,(datum->syntax #f type))) (('struct size alignment fields) (let ((fields* (map (match-lambda ((name offset type) #`(make-struct-field '#,(datum->syntax #f name) #,offset #,(type->syntax type)))) fields))) #`(make-struct #,size #,alignment (list #,@fields*)))) (('union size alignment fields) (let ((fields* (map (match-lambda ((name type) #`(make-union-field '#,(datum->syntax #f name) #,(type->syntax type)))) fields))) #`(make-union #,size #,alignment (list #,@fields*)))) (('array size alignment length type) #`(make-array #,size #,alignment #,length #,(type->syntax type))) (('pointer size alignment ('recur type)) #`(make-pointer #,size #,alignment #f)) (('pointer size alignment 'void) #`(make-pointer #,size #,alignment #f)) (('pointer size alignment type) #`(make-pointer #,size #,alignment #,(type->syntax type))) (('opaque) #'(make-opaque)))) (define (expand-accessor proc stx id op type group) #`(syntax-case #,stx () (() #'#,(proc id 0)) ; self reference ((elem :::) #,(let loop ((stx #'#'(elem :::)) (type type) (offset 0)) (match type ((? bstruct-descriptor-identifier? type) ;; Recursively invoke macro for referenced type to produce ;; the accessor. #`(syntax-case #,stx () ((elem :::) #'#,(proc #`(#,type #,(datum->syntax #f op) elem :::) offset)))) (('scalar _ _ _ _) #`(syntax-case #,stx () (() #'#,(proc type offset)))) (('struct _ _ fields) #`(syntax-case #,stx () ((e elem :::) (match (syntax->datum #'e) #,@(map (match-lambda ((name offset* type) #`('#,(datum->syntax #f name) #,(loop #'#'(elem :::) type #`(+ #,offset #,offset*))))) fields) (_ #'(error "no such struct field" 'e)))))) (('union _ _ fields) #`(syntax-case #,stx () ((e elem :::) (match (syntax->datum #'e) #,@(map (match-lambda ((name type) #`('#,(datum->syntax #f name) #,(loop #'#'(elem :::) type offset)))) fields) (_ #'(error "no such union field" 'e)))))) (('array _ _ length type) ;; Need to generate a unique name here to capture ;; the 'e' containing the array index expression as ;; 'e' could get shadowed later. (with-syntax (((i) (generate-temporaries '(i)))) #`(syntax-case #,stx () ((e elem :::) (with-syntax ((i #'e)) #,(loop #'#'(elem :::) type #`(+ #,offset (* (let () ;; if 'i' is a constant then ;; these checks will be elided ;; by the compiler. (assert (u64? i) 'bstruct-accessor) (assert (< -1 i #,length) 'bstruct-accessor) i) #,(sizeof type))))))))) ;; Void pointers can only be referenced, not ;; dereferenced. (('pointer _ _ 'void) #`(syntax-case #,stx () (() #'#,(proc type offset)))) (('pointer _ _ type*) (call-with-values (lambda () (match type* (('recur type*) (values type* #t)) (type* (values type* #f)))) (lambda (type* recur?) #`(let () (define (expand-pointer-accessor stx) (syntax-case stx () ;; Pointer reference (() #'#,(proc type offset)) ;; Pointer dereference. Sigh, it's ;; complicated... (((* index) elem :::) (identifier-eq? #'* '*) (let ((offset #,offset) (e '#,(datum->syntax #f (target-endianness))) (ptr-size #,(target-word-size)) ;; For recursive types, we don't ;; yet have a defined macro to ;; query for the size, so we have ;; to look it up in the group ;; alist. (size #,(sizeof (if recur? (assoc-ref group type*) type*))) (body #,(match type* ((? syntax?) #`#'#,(proc #`(#,type* #,(datum->syntax #f op) elem :::) 0)) (_ (loop #'#'(elem :::) type* 0))))) ;; 'bv' and 'i' are the lexical ;; variables containing the ;; bytevector and offset for ;; getting/setting. Every time we ;; encounter a pointer dereference we ;; need to shadow the old variables ;; with new ones. #`(let* ((e '#,(datum->syntax #f e)) (base (bytevector-sint-ref bv (+ i #,offset) e #,ptr-size)) (address (+ base (* #,size index))) (ptr (ffi:make-pointer address)) (bv (ffi:pointer->bytevector ptr #,size)) (i 0)) #,body))) ;; Pointer dereference with implied ;; index of 0. ((* elem :::) (identifier-eq? #'* '*) (expand-pointer-accessor #'((* 0) elem :::))))) (expand-pointer-accessor #,stx))))) (('opaque) #'(error "cannot access opaque type"))))))) (define (macro-for-type id type-id type group) (define (self-identifier? other) (eq? id other)) (let ((self-size (sizeof type))) #`(define-syntax #,id (with-ellipsis ::: (lambda (stx) ;; Since syntax transformers are procedures, we can ;; stash useful information in procedure properties that ;; 'define-bstruct' can use if/when a bstruct type ;; is referenced within another type definition. #((bstruct? . #t) (bstruct-opaque? . #,(eq? type '(opaque))) (bstruct-size . #,(sizeof type)) (bstruct-alignment . #,(alignof type))) (syntax-case stx () ;; Identifier syntax to provide the illusion that this ;; macro is just an ordinary variable. (self (identifier? #'self) #'#,type-id) ;; Private interface for code generation. ((_ offset elem :::) (identifier-eq? #'offset 'offset) #`(lambda (bv i) #,#,(expand-accessor (lambda (type offset) (match type ((and sub-offset ((? syntax?) ...)) #`(#,sub-offset bv (+ i #,offset))) (_ #`(values bv #,offset)))) #'#'(elem :::) id 'offset type group))) ((_ getter elem :::) (identifier-eq? #'getter 'getter) #`(lambda (bv i) #,#,(expand-accessor (lambda (type offset) (match type ((or (? self-identifier?) (? bstruct-descriptor-identifier?)) #`(make-bstruct #,type bv (+ i #,offset))) ((and sub-get ((? syntax?) ...)) #`(#,sub-get bv (+ i #,offset))) (('pointer _ _ 'void) #`(ffi:make-pointer (bytevector-sint-ref bv (+ i #,offset) '#,(datum->syntax #f (target-endianness)) #,(target-word-size)))) (('pointer size _ (or ('recur type) type)) #`(ffi:make-pointer (bytevector-sint-ref bv (+ i #,offset) '#,(datum->syntax #f (target-endianness)) #,(target-word-size)))) (('scalar size align native? type) #`(#,(primitive-getter size native? type) bv (+ i #,offset))))) #'#'(elem :::) id 'getter type group))) ((_ setter elem :::) (identifier-eq? #'setter 'setter) #`(lambda (bv i x) #,#,(expand-accessor (lambda (type offset) (match type ((? self-identifier?) #`(match x (($ #,type src j) (bytevector-copy! src j bv (+ i #,offset) #,self-size)))) ((? bstruct-descriptor-identifier?) #`(match x (($ #,type src j) (bytevector-copy! src j bv (+ i #,offset) #,(sizeof type))))) ((and sub-set! ((? syntax?) ...)) #`(#,sub-set! bv (+ i #,offset) x)) (('pointer size _ 'void) #`(bytevector-sint-set! bv (+ i #,offset) (ffi:pointer-address x) '#(datum->syntax #f (target-endianness)) #,(target-word-size))) (('pointer size _ (or ('recur type) type)) #`(bytevector-sint-set! bv (+ i #,offset) (ffi:pointer-address x) '#(datum->syntax #f (target-endianness)) #,(target-word-size))) (('scalar size align native? type) (let ((setter (primitive-setter size native? type))) #`(#,setter bv (+ i #,offset) x))))) #'#'(elem :::) id 'setter type group))))))))) (syntax-case stx () ;; Type group definition. Types defined in the same group can ;; contain recursive pointer references to each other. ((_ (id expr . kwargs) ...) (not (null? #'(id ...))) (with-syntax (((type-id ...) (map (lambda (stx) ;; This ID is non-hygienic and also *not* ;; gensym'd so it's the same name every ;; time for predictable compilation, ;; especially in the presence of 'make ;; -j' parallelism. ;; ;; Is there a better way to do this? ;; Probably. (let ((type-id (symbol-append '%%bstruct-record-type- (syntax->datum stx)))) (datum->syntax #f type-id))) #'(id ...)))) (let* ((ids #'(id ...)) (type-ids #'(type-id ...)) (kwargs #'(kwargs ...)) (types (map (lambda (id expr) (compute-type expr #`(bstruct-descriptor-type #,id) ids #f (target-endianness))) #'(id ...) #'(expr ...))) (group (map cons #'(id ...) types))) #`(begin ;; First, define the descriptors using gensym'd names. #,@(map (lambda (id temp-id kwargs type) #`(define #,temp-id (make-bstruct-descriptor '#,id #,(type->syntax type) #,@kwargs))) ids type-ids kwargs types) ;; Then tie the knot for recursive pointer types. #,@(append-map (lambda (id) (map (lambda (accessor) #`(set-pointer-type! #,accessor #,id)) (hashq-ref recursive-pointers (syntax->datum id) '()))) ids) ;; Finally, define macros using the true names that ;; wrap the gensym'd variables. #,@(map (lambda (id type-id type) (macro-for-type id type-id type group)) ids type-ids types))))) ;; A single type definition is a type group of one. ((_ id expr . kwargs) #'(define-bstruct (id expr . kwargs))))))