;;; guile-bstruct -- Binary structures for Guile ;;; Copyright © 2024 David Thompson ;;; ;;; Licensed under the Apache License, Version 2.0 (the "License"); ;;; you may not use this file except in compliance with the License. ;;; You may obtain a copy of the License at ;;; ;;; http://www.apache.org/licenses/LICENSE-2.0 ;;; ;;; Unless required by applicable law or agreed to in writing, software ;;; distributed under the License is distributed on an "AS IS" BASIS, ;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ;;; See the License for the specific language governing permissions and ;;; limitations under the License. ;;; Commentary: ;; ;; A bstruct is a data type that encapsulates a bytevector and an ;; offset which interprets that bytevector based on a given layout. ;; Bstructs are useful for manipulating C structs when using the FFI, ;; or interpreting/manipulating packed binary data such as GPU vertex ;; buffers, or for data types that benefit from unboxed math ;; optimizations such as vectors and matrices. ;; ;; Inspired by guile-opengl's define-packed-struct and based on ;; "Ftypes: Structured foreign types" by Andy Keep and R. Kent Dybvig. ;; ;; http://scheme2011.ucombinator.org/papers/Keep2011.pdf ;; ;; Features: ;; ;; - Efficiency through macro magic. Procedural macros generate tasty ;; code for the compiler to munch on and emit efficient bytecode. ;; Runtime checks are reduced to the bare minimum. ;; ;; - Raw bytevector access mode. Easily read/write structured data ;; from/to raw bytevectors without going through an intermediary ;; struct. Very useful for batch processing in situations when the ;; overhead of creating wrapper structs would hinder throughput. ;; ;; - Cache friendly. In performance sensitive code, many bstructs can ;; be stored in contiguous memory by pre-allocating a large bytevector ;; for the underlying storage. Individual bstruct handles simply ;; point at different offsets. ;; ;;; Code: (define-module (bstruct) #:use-module (ice-9 exceptions) #:use-module (ice-9 match) #:use-module (rnrs bytevectors) #:use-module (srfi srfi-1) #:use-module (srfi srfi-9) #:use-module (srfi srfi-11) #:use-module (system base target) #:use-module ((system foreign) #:prefix ffi:) #:use-module (system syntax) #:export (bstruct? bstruct-=? bstruct-type bstruct-length bstruct->sexp bstruct->pointer pointer->bstruct bstruct-wrap bstruct-unwrap bstruct-alloc bstruct-sizeof bstruct-alignof bstruct-ref bstruct-set! bstruct-unpack bstruct-pack! bstruct-copy bstruct-copy! define-bstruct)) (define-record-type (make-scalar size align native? type) scalar? (size scalar-size) (align scalar-align) (native? scalar-native?) ; native endianness? (type scalar-type)) (define-record-type (make-bit-field name offset length signed?) bit-field? (name bit-field-name) (offset bit-field-offset) (length bit-field-length) (signed? bit-field-signed?)) (define-record-type (make-bits size align native? fields) bits? (size bits-size) (align bits-align) (native? bits-native?) (fields bits-fields)) (define-record-type (make-struct-field name offset type) struct-field? (name struct-field-name) (offset struct-field-offset) (type struct-field-type)) (define-record-type (make-struct size align fields) %struct? (size struct-size) (align struct-align) (fields struct-fields)) (define-record-type (make-union-field name type) union-field? (name union-field-name) (type union-field-type)) (define-record-type (make-union size align fields) union? (size union-size) (align union-align) (fields union-fields)) (define-record-type (make-array size align length type) array? (size array-size) (align array-align) (length array-length) (type array-type)) (define-record-type (make-pointer size align type) pointer? (size pointer-size) (align pointer-align) ;; Mutable for setting up recursive types. (type pointer-type set-pointer-type!)) (define (struct-field-ref struct name) (and=> (find (match-lambda (($ name*) (eq? name name*))) (struct-fields struct)) struct-field-type)) (define (union-field-ref struct name) (and=> (find (match-lambda (($ name*) (eq? name name*))) (union-fields struct)) union-field-type)) (define (bstruct-type? obj) (or (scalar? obj) (bits? obj) (%struct? obj) (union? obj) (array? obj) (pointer? obj))) (define (sizeof type) (match type ((or ($ size) ($ size) ($ size) ($ size) ($ size) ($ size)) size))) (define (alignof type) (match type ((or ($ _ align) ($ _ align) ($ _ align) ($ _ align) ($ _ align) ($ _ align)) align))) (define-syntax-rule (assert expr who) (unless expr (raise-exception (make-exception (make-assertion-failure) (make-exception-with-origin who) (make-exception-with-irritants '(expr)))))) (define-syntax-rule (check-size i n who) (assert (and (>= i 0) (< i n)) who)) (define-inlinable (u64? x) (and (exact-integer? x) (<= 0 x (ash 1 64)))) ;; Bstructs form a shallow vtable hierarchy. (define (make-vtable (string-append standard-vtable-fields "pwpw") (lambda (desc port) (format port "#" (bstruct-descriptor-name desc))))) (define (bstruct-descriptor-name descriptor) (struct-ref descriptor vtable-offset-user)) (define (bstruct-descriptor-type descriptor) (struct-ref descriptor (+ vtable-offset-user 1))) (define (bstruct-descriptor-sizeof descriptor) (sizeof (bstruct-descriptor-type descriptor))) (define (print-bstruct bs port) (format port "#<~a ~a>" (bstruct-descriptor-name (struct-vtable bs)) (bstruct->sexp bs))) (define* (make-bstruct-descriptor name type #:key (printer print-bstruct)) (assert (bstruct-type? type) 'make-bstruct-descriptor) (make-struct/no-tail (make-struct-layout "pwpwpw") printer name type)) (define (bstruct-descriptor? obj) (and (struct? obj) (eq? (struct-vtable obj) ))) (define (%bstruct? obj) (and (struct? obj) (bstruct-descriptor? (struct-vtable obj)))) (define (bstruct-bytevector bs) (assert (%bstruct? bs) 'bstruct-bytevector) (struct-ref bs 0)) (define (bstruct-offset bs) (assert (%bstruct? bs) 'bstruct-bytevector) (struct-ref bs 1)) (define (bstruct-length bs) (assert (%bstruct? bs) 'bstruct-bytevector) (struct-ref bs 2)) (define (bstruct-type bs) (assert (%bstruct? bs) 'bstruct-bytevector) (bstruct-descriptor-type (struct-vtable bs))) ;; Bstructs are composed of a type descriptor, a bytevector that ;; provides storage, an offset pointing to the start of the struct ;; data within that bytevector, and the number of contiguous structs ;; within. (define (%make-bstruct descriptor bv offset n) (make-struct/no-tail descriptor bv offset n)) (define (make-bstruct descriptor bv offset n) (assert (bstruct-descriptor? descriptor) 'make-bstruct) (assert (bytevector? bv) 'make-bstruct) (assert (exact-integer? offset) 'make-bstruct) (assert (>= offset 0) 'make-bstruct) (assert (<= (+ offset (* (bstruct-descriptor-sizeof descriptor) n)) (bytevector-length bv)) 'make-bstruct) (%make-bstruct descriptor bv offset n)) ;; Platform ABI details (define (non-target-endianness) (if (eq? (target-endianness) (endianness little)) (endianness big) (endianness little))) (define (sizeof* type) (match type ((or 'u8 's8) 1) ((or 'u16 's16) 2) ((or 'u32 's32 'f32) 4) ((or 'u64 's64 'f64) 8) (_ (if (string=? %host-type (target-type)) (match type ('uint (ffi:sizeof ffi:unsigned-int)) ('int (ffi:sizeof ffi:int)) ('ulong (ffi:sizeof ffi:unsigned-long)) ('long (ffi:sizeof ffi:long)) ('ushort (ffi:sizeof ffi:unsigned-short)) ('short (ffi:sizeof ffi:short)) ('size_t (ffi:sizeof ffi:size_t)) ('ssize_t (ffi:sizeof ffi:ssize_t)) ('ptrdiff_t (ffi:sizeof ffi:ptrdiff_t)) ('intptr_t (ffi:sizeof ffi:intptr_t)) ('uintptr_t (ffi:sizeof ffi:uintptr_t))) ;; FIXME: Fill in with proper ABI details. We will lazily ;; evaluate this work when we hit a problem in a cross build. (match type ('uint 4) ('int 4) ('ulong 8) ('long 8) ('ushort 2) ('short 2) ('size_t (target-word-size)) ('ssize_t (target-word-size)) ('ptrdiff_t (target-word-size)) ('intptr_t (target-word-size)) ('uintptr_t (target-word-size))))))) (define (alignof* type) (match type ((or 'u8 's8) 1) ((or 'u16 's16) 2) ((or 'u32 's32 'f32) 4) ((or 'u64 's64 'f64) 8) (_ (if (string=? %host-type (target-type)) (match type ('uint (ffi:sizeof ffi:unsigned-int)) ('int (ffi:sizeof ffi:int)) ('ulong (ffi:sizeof ffi:unsigned-long)) ('long (ffi:sizeof ffi:long)) ('ushort (ffi:sizeof ffi:unsigned-short)) ('short (ffi:sizeof ffi:short)) ('size_t (ffi:sizeof ffi:size_t)) ('ssize_t (ffi:sizeof ffi:ssize_t)) ('ptrdiff_t (ffi:sizeof ffi:ptrdiff_t)) ('intptr_t (ffi:sizeof ffi:intptr_t)) ('uintptr_t (ffi:sizeof ffi:uintptr_t)) ('* (ffi:sizeof '*))) (match type ('uint 4) ('int 4) ('ulong 8) ('long 8) ('ushort 2) ('short 2) ('size_t (target-word-size)) ('ssize_t (target-word-size)) ('ptrdiff_t (target-word-size)) ('intptr_t (target-word-size)) ('uintptr_t (target-word-size)) ('* (target-word-size))))))) ;; It is useful to see bstructs in s-expression form when working ;; at the REPL. (define (bstruct->sexp bs) (let ((bv (bstruct-bytevector bs))) (let loop ((type (bstruct-type bs)) (offset (bstruct-offset bs))) (match type ((? bstruct-descriptor? desc) (loop (bstruct-descriptor-type desc) offset)) (($ _ _ native? type) (let ((e (if native? (target-endianness) (non-target-endianness)))) (match type ('u8 (bytevector-u8-ref bv offset)) ('s8 (bytevector-s8-ref bv offset)) ('u16 (bytevector-u16-ref bv offset e)) ('s16 (bytevector-s16-ref bv offset e)) ('u32 (bytevector-u32-ref bv offset e)) ('s32 (bytevector-s32-ref bv offset e)) ('u64 (bytevector-u64-ref bv offset e)) ('s64 (bytevector-s64-ref bv offset e)) ('f32 (bytevector-ieee-single-ref bv offset e)) ('f64 (bytevector-ieee-double-ref bv offset e)) ('uint (bytevector-uint-ref bv offset e (ffi:sizeof ffi:unsigned-int))) ('int (bytevector-sint-ref bv offset e (ffi:sizeof ffi:int))) ('ulong (bytevector-uint-ref bv offset e (ffi:sizeof ffi:unsigned-long))) ('long (bytevector-sint-ref bv offset e (ffi:sizeof ffi:long))) ('ushort (bytevector-uint-ref bv offset e (ffi:sizeof ffi:unsigned-short))) ('short (bytevector-sint-ref bv offset e (ffi:sizeof ffi:short))) ('size_t (bytevector-uint-ref bv offset e (ffi:sizeof ffi:size_t))) ('ssize_t (bytevector-sint-ref bv offset e (ffi:sizeof ffi:ssize_t))) ('ptrdiff_t (bytevector-sint-ref bv offset e (ffi:sizeof ffi:ptrdiff_t))) ('intptr_t (bytevector-sint-ref bv offset e (ffi:sizeof ffi:intptr_t))) ('uintptr_t (bytevector-uint-ref bv offset e (ffi:sizeof ffi:uintptr_t)))))) (($ size _ native? fields) (let ((e (if native? (target-endianness) (non-target-endianness)))) `(bits ,@(map (match-lambda (($ name start len signed?) (list name (let ((x (ash (logand (bytevector-uint-ref bv offset e size) (ash (1- (ash 1 len)) start)) (- start)))) (if signed? (centered-remainder x (ash 1 len)) x))))) fields)))) (($ _ _ fields) `(struct ,@(map (match-lambda (($ name offset* type) (list name (loop type (+ offset offset*))))) fields))) (($ _ _ fields) `(union ,@(map (match-lambda (($ name type) (list name (loop type offset)))) fields))) (($ _ _ length type) (let ((size (sizeof type))) `(array ,@(map (lambda (i) (loop type (+ offset (* i size)))) (iota length))))) (($ ) `(* ,(bytevector-uint-ref bv offset (native-endianness) (ffi:sizeof '*)))))))) ;; Macro helpers that use metadata attached to bstruct syntax ;; transformers. (define (bstruct-descriptor-identifier? id) (and (identifier? id) (let-values (((kind val) (syntax-local-binding id))) (and (eq? kind 'macro) (procedure-property val 'bstruct?))))) (define (opaque-bstruct-descriptor-identifier? id) (and (identifier? id) (let-values (((kind val) (syntax-local-binding id))) (and (eq? kind 'macro) (procedure-property val 'bstruct?) (procedure-property val 'bstruct-opaque?))))) (define (non-opaque-bstruct-descriptor-identifier? id) (not (opaque-bstruct-descriptor-identifier? id))) (define (bstruct-descriptor-identifier-size id) (let-values (((_ transformer) (syntax-local-binding id))) (procedure-property transformer 'bstruct-size))) (define (bstruct-descriptor-identifier-align id) (let-values (((_ transformer) (syntax-local-binding id))) (procedure-property transformer 'bstruct-align))) (define (identifier-eq? stx sym) (and (identifier? stx) (eq? (syntax->datum stx) sym))) (define-syntax-rule (symbolic-match? id) (identifier-eq? #'id 'id)) (define (identifier-memq? stx syms) (and (identifier? stx) (memq (syntax->datum stx) syms))) ;; A predicate that can answer the questions: ;; 1) Is this *any kind* of bstruct? ;; 2) Is this *a specific kind* of bstruct? (define-syntax bstruct? (lambda (stx) (syntax-case stx () (x (identifier? #'x) #'(case-lambda ((obj) (%bstruct? obj)) (( obj) (match obj (($ ) #t) (_ #f))))) ((_ obj) #'(%bstruct? obj)) ((_ obj) (bstruct-descriptor-identifier? #') #'(match obj (($ ) #t) (_ #f)))))) (define-syntax bstruct-=? (lambda (stx) (syntax-case stx () ((_ a b) (bstruct-descriptor-identifier? #') #'(match a (($ bv-a offset-a) (match b (($ bv-b offset-b) (let ((n (bstruct-sizeof ))) (let loop ((i 0)) (cond ((= i n) #t) ((= (bytevector-u8-ref bv-a (+ offset-a i)) (bytevector-u8-ref bv-b (+ offset-b i))) (loop (+ i 1))) (else #f)))))))))))) ;; Create a pointer to the bstruct or some element within the bstruct. (define-syntax bstruct->pointer (lambda (stx) (syntax-case stx () ((_ ( i) bs) (bstruct-descriptor-identifier? #') #'(match bs (($ bv (? u64? offset)) (ffi:bytevector->pointer bv offset)))) ((_ ( i) bs (elem ...)) (bstruct-descriptor-identifier? #') #'(match bs (($ bv (? u64? offset) (? u64? n)) (check-size i n 'bstruct->pointer) (let ((offset (+ offset (* (bstruct-sizeof ) i)))) (call-with-values (lambda () (( offset elem ...) bv offset)) ffi:bytevector->pointer))))) ((_ ( i) bs elem) #'(bstruct->pointer ( i) bs (elem))) ((_ bs) #'(bstruct->pointer ( 0) bs)) ((_ bs (elem ...)) #'(bstruct->pointer ( 0) bs (elem ...))) ((_ bs elem) #'(bstruct->pointer ( 0) bs (elem)))))) (define-syntax pointer->bstruct (lambda (stx) (syntax-case stx () ((_ ptr n) (non-opaque-bstruct-descriptor-identifier? #') #'(let ((size (* (bstruct-sizeof ) n))) (make-bstruct (ffi:pointer->bytevector ptr size) 0 n))) ((_ ptr) #'(pointer->bstruct ptr 1))))) ;; Wrap a bytevector in a bstruct. (define-syntax bstruct-wrap (lambda (stx) (syntax-case stx () ((_ bv offset n) (non-opaque-bstruct-descriptor-identifier? #') #'(make-bstruct bv offset n)) ((_ bv offset) #'(bstruct-wrap bv offset 1))))) ;; Unwrap a bstruct to a bytevector + offset + count. (define-syntax bstruct-unwrap (lambda (stx) (syntax-case stx () ((_ bs) (bstruct-descriptor-identifier? #') #`(match bs (($ bv offset n) (values bv offset n))))))) ;; Size/align queries. (define-syntax bstruct-sizeof (lambda (stx) (syntax-case stx () ((_ ) (non-opaque-bstruct-descriptor-identifier? #') (bstruct-descriptor-identifier-size #'))))) (define-syntax bstruct-alignof (lambda (stx) (syntax-case stx () ((_ ) (non-opaque-bstruct-descriptor-identifier? #') (bstruct-descriptor-identifier-align #'))))) ;; 'bstruct-pack!' and 'bstruct-unpack' allow for directly ;; interpreting bytevector contents as structured data. (define-syntax bstruct-pack! (lambda (stx) (define (flatten-elems stx) (append-map (lambda (stx) (syntax-case stx () ((-> root-elem sub-elems ...) (and (symbolic-match? ->) (identifier? #'root-elem)) (map (lambda (stx) (syntax-case stx () (((sub-elem ...) val) #'((root-elem sub-elem ...) val)))) (flatten-elems #'(sub-elems ...)))) (((elem ...) val) #'(((elem ...) val))) ((elem val) #'(((elem) val))))) stx)) (syntax-case stx () ((_ bv i elem ...) (non-opaque-bstruct-descriptor-identifier? #') (with-syntax (((((elem ...) val) ...) (flatten-elems #'(elem ...)))) #'(begin (( setter elem ...) bv i val) ...)))))) (define-syntax bstruct-unpack (lambda (stx) (syntax-case stx () ((_ bv i elem ...) (non-opaque-bstruct-descriptor-identifier? #') #`(values #,@(map (lambda (elem) (syntax-case elem () ((e ...) #'(( getter e ...) bv i)) (e #'(( getter e) bv i)))) #'(elem ...))))))) ;; Allocate a fresh bstruct that wraps a fresh bytevector big ;; enough to store the entire structure. (define-syntax bstruct-alloc (syntax-rules () ((_ ( n) (i elem ...) ...) (let* ((size (bstruct-sizeof )) (len (* size n)) (bv (make-bytevector len 0))) (bstruct-pack! bv (* size i) elem ...) ... (bstruct-wrap bv 0))) ((_ elem ...) (bstruct-alloc ( 1) (0 elem ...))))) ;; Return the value of some elements. (define-syntax bstruct-ref (syntax-rules () ((_ ( i) bs elem ...) ; array (match bs (($ bv (? u64? offset) (? u64? n)) (assert (u64? i) 'bstruct-ref) (check-size i n 'bstruct-ref) (let ((offset (+ offset (* (bstruct-sizeof ) i)))) (bstruct-unpack bv offset elem ...))))) ((_ bs elem ...) (bstruct-ref ( 0) bs elem ...)))) ;; Set the value of some elements. (define-syntax bstruct-set! (syntax-rules () ((_ ( i) bs elem ...) ; array (match bs (($ bv (? u64? offset) (? u64? n)) (assert (u64? i) 'bstruct-set!) (check-size i n 'bstruct-set!) (let ((offset (+ offset (* (bstruct-sizeof ) i)))) (bstruct-pack! bv offset elem ...))))) ((_ bs elem ...) (bstruct-set! ( 0) bs elem ...)))) ;; Imperative/functional struct copying. (define-syntax-rule (bstruct-copy! src dst) (match src (($ src-bv (? u64? src-offset) (? u64? src-n)) (match dst (($ dst-bv (? u64? dst-offset) (? u64? dst-n)) (check-size src-n (1+ dst-n) 'bstruct-copy!) (bytevector-copy! src-bv src-offset dst-bv dst-offset (* (bstruct-sizeof ) src-n))))))) (define-syntax-rule (bstruct-copy src) (match src (($ _ _ (? u64? n)) (let ((dst (bstruct-alloc ( n)))) (bstruct-copy! src dst) dst)))) (define-syntax define-bstruct-descriptor (lambda (stx) (define (build-bit-field stx) (syntax-case stx () ((name offset len signed?) #`(make-bit-field 'name offset len signed?)))) (define (build-struct-field stx) (syntax-case stx () ((name offset type) #`(make-struct-field 'name offset #,(build-type #'type))))) (define (build-union-field stx) (syntax-case stx () ((name type) #`(make-union-field 'name #,(build-type #'type))))) (define (build-type desc) (syntax-case desc () (id (identifier? #'id) desc) ((scalar size align native? type) (symbolic-match? scalar) #'(make-scalar size align native? 'type)) ((bits size align native? (field ...)) (symbolic-match? bits) (with-syntax (((field ...) (map build-bit-field #'(field ...)))) #'(make-bits size align native? (list field ...)))) ((struct size align (field ...)) (symbolic-match? struct) (with-syntax (((field ...) (map build-struct-field #'(field ...)))) #'(make-struct size align (list field ...)))) ((union size align (field ...)) (symbolic-match? union) (with-syntax (((field ...) (map build-union-field #'(field ...)))) #'(make-union size align (list field ...)))) ((array size align length desc) (symbolic-match? array) #`(make-array size align length #,(build-type #'desc))) ((pointer size align (recur _)) (and (symbolic-match? pointer) (symbolic-match? recur)) #'(make-pointer size align #f)) ((pointer size align void) (and (symbolic-match? pointer) (symbolic-match? void)) #'(make-pointer size align #f)) ((pointer size align desc) (symbolic-match? pointer) #`(make-pointer size align #,(build-type #'desc))))) (syntax-case stx () ;; Opaque types have no descriptor. ((_ id name (opaque)) (symbolic-match? opaque) #'(define id #f)) ((_ id name layout . kwargs) (with-syntax ((type (build-type #'layout))) #`(define id (make-bstruct-descriptor 'name type . kwargs))))))) (define (sizeof/syntax type) (syntax-case type () ((scalar size _ _ _) (symbolic-match? scalar) (syntax->datum #'size)) ((bits size _ _ _) (symbolic-match? bits) (syntax->datum #'size)) ((struct size _ _) (symbolic-match? struct) (syntax->datum #'size)) ((union size _ _) (symbolic-match? union) (syntax->datum #'size)) ((array size _ _ _) (symbolic-match? array) (syntax->datum #'size)) ((pointer size _ _) (symbolic-match? pointer) (syntax->datum #'size)) ((opaque) (symbolic-match? opaque) 0) (_ (bstruct-descriptor-identifier? type) (bstruct-descriptor-identifier-size type)))) (define (alignof/syntax type) (syntax-case type () ((scalar _ align _ _) (symbolic-match? scalar) (syntax->datum #'align)) ((bits _ align _ _) (symbolic-match? bits) (syntax->datum #'align)) ((struct _ align _) (symbolic-match? struct) (syntax->datum #'align)) ((union _ align _) (symbolic-match? union) (syntax->datum #'align)) ((array _ align _ _) (symbolic-match? array) (syntax->datum #'align)) ((pointer _ align _) (symbolic-match? pointer) (syntax->datum #'align)) ((opaque) (symbolic-match? opaque) 0) (_ (bstruct-descriptor-identifier? type) (bstruct-descriptor-identifier-align type)))) (define (expand-accessor proc stx id op type) (syntax-case stx () (() (proc id 0)) ; self reference ((elem ...) (let loop ((stx #'(elem ...)) (type type) (offset 0)) (syntax-case type () (id (bstruct-descriptor-identifier? #'id) ;; Recursively invoke macro for referenced type to produce ;; the accessor. (syntax-case stx () ((elem ...) (proc #`(id #,(datum->syntax #f op) elem ...) offset)))) ((scalar _ _ _ _) (symbolic-match? scalar) (syntax-case stx () (() (proc type offset)))) ((bits size _ native? (field ...)) (symbolic-match? bits) (syntax-case stx () ((e) (identifier? #'e) (let ((name (syntax->datum #'e))) (let field-lp ((fields #'(field ...))) (syntax-case fields () (((name* offset* len signed?) . rest) (if (identifier-eq? #'name* name) (proc #`(bit-field size offset* len signed? native?) offset) (field-lp #'rest))))))))) ((struct _ _ (field ...)) (symbolic-match? struct) (syntax-case stx () ((e elem ...) (identifier? #'e) (let ((name (syntax->datum #'e))) (let field-lp ((fields #'(field ...))) (syntax-case fields () (((name* offset* type*) . rest) (if (identifier-eq? #'name* name) (loop #'(elem ...) #'type* #`(+ #,offset offset*)) (field-lp #'rest))))))))) ((union _ _ (field ...)) (symbolic-match? union) (syntax-case stx () ((e elem ...) (identifier? #'e) (let ((name (syntax->datum #'e))) (let field-lp ((fields #'(field ...))) (syntax-case fields () (((name* type) . rest) (if (identifier-eq? #'name* name) (loop #'(elem ...) #'type offset) (field-lp #'rest))))))))) ((array _ _ length type) (symbolic-match? array) (syntax-case stx () ((i elem ...) (loop #'(elem ...) #'type #`(+ #,offset (* (let () ;; if 'i' is a constant then ;; these checks will be elided ;; by the compiler. (assert (u64? i) 'bstruct-accessor) (assert (< -1 i length) 'bstruct-accessor) i) #,(sizeof/syntax #'type))))))) ;; Void/opaque pointers cannot be dereferenced. ((pointer _ _ void) (and (symbolic-match? pointer) (symbolic-match? void)) (syntax-case stx () (() (proc type offset)))) ((pointer _ _ opaque) (and (symbolic-match? pointer) (opaque-bstruct-descriptor-identifier? #'opaque)) (syntax-case stx () (() (proc type offset)))) ((pointer _ _ type*) (symbolic-match? pointer) (let ((type* (syntax-case #'type* () ((recur type*) (symbolic-match? recur) #'type*) (_ #'type*)))) (define (expand-pointer-accessor stx) (syntax-case stx () (() (proc type offset)) ;; Pointer dereference. (((* index) elem ...) (symbolic-match? *) (with-syntax ((e (datum->syntax #f (target-endianness))) (ptr-size (target-word-size)) (size (sizeof/syntax type*)) (body (syntax-case type* () (id (identifier? #'id) (proc #`(id #,(datum->syntax #f op) elem ...) 0)) (_ (loop #'(elem ...) type* 0))))) ;; 'bv' and 'i' are the lexical variables ;; containing the bytevector and offset for ;; getting/setting. Every time we encounter a ;; pointer dereference we need to shadow the old ;; variables with new ones. #`(let* ((base (bytevector-sint-ref bv (+ i #,offset) 'e ptr-size)) (address (+ base (* size index))) (ptr (ffi:make-pointer address)) (bv (ffi:pointer->bytevector ptr size)) (i 0)) body))) ;; Pointer dereference with implied index of 0. ((* elem ...) (symbolic-match? *) (expand-pointer-accessor #'((* 0) elem ...))))) (expand-pointer-accessor stx)))))))) (define (expand-offset elems id type) #`(lambda (bv i) #,(expand-accessor (lambda (type offset) (with-syntax ((offset offset)) (syntax-case type () (id* (bstruct-descriptor-identifier? #'id*) #'(values bv offset)) ((pointer _ _ _) (symbolic-match? pointer) #'(values bv offset)) ((scalar size align native? type) (symbolic-match? scalar) #'(values bv offset)) ((bit-field size start len signed? native?) (symbolic-match? bit-field) (syntax-violation 'bstruct->pointer "cannot create pointer to bit field" id elems)) (sub-offset #'(sub-offset bv (+ i offset)))))) elems id 'offset type))) ;; Primitive getter/setter helpers (define (ref/endianness proc endianness) #`(lambda (bv i) (#,proc bv i #,endianness))) (define (uint-ref size endianness) #`(lambda (bv i) (bytevector-uint-ref bv i '#,(datum->syntax #f endianness) #,size))) (define (sint-ref size endianness) #`(lambda (bv i) (bytevector-sint-ref bv i '#,(datum->syntax #f endianness) #,size))) ;; Scalar types are divided into two categories: machine ;; indepenent and machine dependent. The machine independent ;; types (i32, f32, etc.) have a known size and alignment on all ;; platforms. The machine dependent types have a size and ;; alignment that can vary depending on the ABI of the system that ;; is compiling the code. (define (abi-ref type e) ; e for endianness (match type ('uint (uint-ref (sizeof* 'uint) e)) ('int (sint-ref (sizeof* 'int) e)) ('ulong (uint-ref (sizeof* 'ulong) e)) ('long (sint-ref (sizeof* 'long) e)) ('ushort (uint-ref (sizeof* 'ushort) e)) ('short (sint-ref (sizeof* 'short) e)) ('size_t (uint-ref (sizeof* 'size_t) e)) ('ssize_t (sint-ref (sizeof* 'ssize_t) e)) ('ptrdiff_t (sint-ref (sizeof* 'ptrdiff_t) e)) ('intptr_t (sint-ref (sizeof* 'intptr_t) e)) ('uintptr_t (uint-ref (sizeof* 'uintptr_t) e)))) (define (primitive-getter native? type) (if native? (match type ('u8 #'bytevector-u8-ref) ('s8 #'bytevector-s8-ref) ('u16 #'bytevector-u16-native-ref) ('s16 #'bytevector-s16-native-ref) ('u32 #'bytevector-u32-native-ref) ('s32 #'bytevector-s32-native-ref) ('u64 #'bytevector-u64-native-ref) ('s64 #'bytevector-s64-native-ref) ('f32 #'bytevector-ieee-single-native-ref) ('f64 #'bytevector-ieee-double-native-ref) (_ (abi-ref type (target-endianness)))) (let ((e (non-target-endianness))) (match type ('u8 #'bytevector-u8-ref) ('s8 #'bytevector-s8-ref) ('u16 (ref/endianness #'bytevector-u16-ref e)) ('s16 (ref/endianness #'bytevector-s16-ref e)) ('u32 (ref/endianness #'bytevector-u32-ref e)) ('s32 (ref/endianness #'bytevector-s32-ref e)) ('u64 (ref/endianness #'bytevector-u64-ref e)) ('s64 (ref/endianness #'bytevector-s64-ref e)) ('f32 (ref/endianness #'bytevector-ieee-single-ref e)) ('f64 (ref/endianness #'bytevector-ieee-double-ref e)) (_ (abi-ref type e)))))) (define (expand-getter elems id type) #`(lambda (bv i) #,(expand-accessor (lambda (type offset) (with-syntax ((offset offset)) (syntax-case type () (id* (bstruct-descriptor-identifier? #'id*) #'(make-bstruct id* bv (+ i offset))) ((pointer _ _ _) (symbolic-match? pointer) #`(ffi:make-pointer (bytevector-sint-ref bv (+ i offset) '#,(datum->syntax #f (target-endianness)) #,(target-word-size)))) ((scalar size align native? type) (symbolic-match? scalar) (with-syntax ((getter (primitive-getter (syntax->datum #'native?) (syntax->datum #'type)))) #'(getter bv (+ i offset)))) ((bit-field size start len signed? native?) (symbolic-match? bit-field) (let ((type (if (syntax->datum #'signed?) (match (syntax->datum #'size) (1 's8) (2 's16) (4 's32) (8 's64)) (match (syntax->datum #'size) (1 'u8) (2 'u16) (4 'u32) (8 'u64))))) (with-syntax ((getter (primitive-getter (syntax->datum #'native?) type))) #'(let ((x (ash (logand (getter bv (+ i offset)) (ash (1- (ash 1 len)) start)) (- start)))) (if signed? (centered-remainder x (ash 1 len)) x))))) (sub-get #'(sub-get bv (+ i offset)))))) elems id 'getter type))) (define (set!/endianness proc endianness) #`(lambda (bv i x) (#,proc bv i x #,endianness))) (define (uint-set! size endianness) #`(lambda (bv i x) (bytevector-uint-set! bv i x '#,(datum->syntax #f endianness) #,size))) (define (sint-set! size endianness) #`(lambda (bv i x) (bytevector-sint-set! bv i x '#,(datum->syntax #f endianness) #,size))) (define (abi-set! type e) (match type ('uint (uint-set! (sizeof* 'uint) e)) ('int (sint-set! (sizeof* 'int) e)) ('ulong (uint-set! (sizeof* 'ulong) e)) ('long (sint-set! (sizeof* 'long) e)) ('ushort (uint-set! (sizeof* 'ushort) e)) ('short (sint-set! (sizeof* 'short) e)) ('size_t (uint-set! (sizeof* 'size_t) e)) ('ssize_t (sint-set! (sizeof* 'ssize_t) e)) ('ptrdiff_t (sint-set! (sizeof* 'ptrdiff_t) e)) ('intptr_t (sint-set! (sizeof* 'intptr_t) e)) ('uintptr_t (uint-set! (sizeof* 'uintptr_t) e)))) (define (primitive-setter native? type) (if native? (match type ('u8 #'bytevector-u8-set!) ('s8 #'bytevector-s8-set!) ('u16 #'bytevector-u16-native-set!) ('s16 #'bytevector-s16-native-set!) ('u32 #'bytevector-u32-native-set!) ('s32 #'bytevector-s32-native-set!) ('u64 #'bytevector-u64-native-set!) ('s64 #'bytevector-s64-native-set!) ('f32 #'bytevector-ieee-single-native-set!) ('f64 #'bytevector-ieee-double-native-set!) (_ (abi-set! type (target-endianness)))) (let ((e (non-target-endianness))) (match type ('u8 #'bytevector-u8-set!) ('s8 #'bytevector-s8-set!) ('u16 (set!/endianness #'bytevector-u16-set! e)) ('s16 (set!/endianness #'bytevector-s16-set! e)) ('u32 (set!/endianness #'bytevector-u32-set! e)) ('s32 (set!/endianness #'bytevector-s32-set! e)) ('u64 (set!/endianness #'bytevector-u64-set! e)) ('s64 (set!/endianness #'bytevector-s64-set! e)) ('f32 (set!/endianness #'bytevector-ieee-single-set! e)) ('f64 (set!/endianness #'bytevector-ieee-double-set! e)) (_ (abi-set! type e)))))) (define (expand-setter elems id type size) #`(lambda (bv i x) #,(expand-accessor (lambda (type offset) (with-syntax ((offset offset)) (syntax-case type () (id (bstruct-descriptor-identifier? #'id) #`(match x (($ id src j) (bytevector-copy! src j bv (+ i offset) #,(bstruct-descriptor-identifier-size type))))) ((pointer _ _ _) (symbolic-match? pointer) #`(bytevector-sint-set! bv (+ i offset) (ffi:pointer-address x) '#,(datum->syntax #f (target-endianness)) #,(target-word-size))) ((scalar size align native? type) (symbolic-match? scalar) (with-syntax ((setter (primitive-setter (syntax->datum #'native?) (syntax->datum #'type)))) #'(setter bv (+ i offset) x))) ((bit-field size start len signed? native?) (symbolic-match? bit-field) (let ((type (if (syntax->datum #'signed?) (match (syntax->datum #'size) (1 's8) (2 's16) (4 's32) (8 's64)) (match (syntax->datum #'size) (1 'u8) (2 'u16) (4 'u32) (8 'u64)))) (native? (syntax->datum #'native?))) (with-syntax ((getter (primitive-getter native? type)) (setter (primitive-setter native? type))) ;; The mask keeps the bits that are *not* part of ;; the bit field for the existing value. #'(let* ((mask (lognot (ash (1- (ash 1 len)) start))) (rest (logand (getter bv (+ i offset)) mask))) (define (fail) (error "bit field value out of range" x)) (if signed? (if (<= (ash -1 (1- len)) x (1- (ash 1 (1- len)))) ;; Convert to unsigned representation. (let ((u (logand (1- (ash 1 len)) x))) (setter bv (+ i offset) (logior rest (ash u start)))) (fail)) (if (<= 0 x (1- (ash 1 len))) (setter bv (+ i offset) (logior rest (ash x start))) (fail))))))) (sub-set! #'(sub-set! bv (+ i offset) x))))) elems id 'setter type))) (define-syntax define-bstruct-macro (lambda (stx) (syntax-case stx () ((_ id type-id (opaque)) (symbolic-match? opaque) #'(define-syntax id (lambda (stx) #((bstruct? . #t) (bstruct-opaque? . #t)) (syntax-case stx () ;; Identifier syntax to provide the illusion that ;; this macro is just a record type descriptor. (self (identifier? #'self) #'type-id))))) ((_ id type-id size align desc . kwargs) #'(define-syntax id (lambda (stx) #((bstruct? . #t) (bstruct-opaque? . #f) (bstruct-size . size) (bstruct-align . align)) (let ((type #'desc)) (syntax-case stx () (self (identifier? #'self) #'type-id) ;; Private interface for code generation. ((_ offset . elems) (symbolic-match? offset) (expand-offset #'elems #'id type)) ((_ getter . elems) (symbolic-match? getter) (expand-getter #'elems #'id type)) ((_ setter . elems) (symbolic-match? setter) (expand-setter #'elems #'id type size)))))))))) (define-syntax define-bstruct* (lambda (stx) ;; Types can be recursive by referencing a type within the same ;; type group, possibly itself. So, we maintain a list of pointer ;; type accessor forms that need to be patched with a cyclical ;; reference *after* all the types are defined. (define recursive-pointers '()) (define primitives '(u8 s8 u16 s16 u32 s32 u64 s64 f32 f64 int uint long ulong short ushort size_t ssize_t ptrdiff_t intptr_t uintptr_t)) (define bit-field-primitives (remove (lambda (t) (memq t '(f32 f64))) primitives)) (define (target-endianness? e) (eq? e (target-endianness))) (define (resolve-endianness e) (match e ('native (target-endianness)) ('non-native (non-target-endianness)) (_ e))) (define (compute-layout expr accessor group-ids packed? endianness) (syntax-case expr () ;; Modifiers ((packed expr) (symbolic-match? packed) (compute-layout #'expr accessor group-ids #t endianness)) ((unpacked expr) (symbolic-match? unpacked) (compute-layout #'expr accessor group-ids #f endianness)) ((endian e expr) (and (symbolic-match? endian) (identifier-memq? #'e '(native non-native big little))) (compute-layout #'expr accessor group-ids packed? (resolve-endianness (syntax->datum #'e)))) ;; Previously defined types (type-id (bstruct-descriptor-identifier? #'type-id) #'type-id) ((opaque) (symbolic-match? opaque) expr) (primitive (identifier-memq? #'primitive primitives) (let ((type (syntax->datum #'primitive))) #`(scalar #,(sizeof* type) #,(alignof* type) #,(target-endianness? endianness) primitive))) ((bits field ...) (symbolic-match? bits) (let lp ((field-stx #'(field ...)) (fields '()) (bits 0)) (syntax-case field-stx () (() (let () (define (finish size) #`(bits #,size #,size #,(target-endianness? endianness) #,(reverse fields))) (cond ((<= bits 8) (finish 1)) ((<= bits 16) (finish 2)) ((<= bits 32) (finish 4)) ((<= bits 64) (finish 8)) (else (syntax-violation 'define-bstruct "bit fields exceed maximum of 64 bits" stx))))) (((underscore len) . rest) (and (identifier-eq? #'underscore '_) (exact-integer? (syntax->datum #'len)) (positive? (syntax->datum #'len))) (lp #'rest fields (+ bits (syntax->datum #'len)))) (((name len sign) . rest) (and (identifier? #'name) (identifier-memq? #'sign '(u s)) (exact-integer? (syntax->datum #'len)) (positive? (syntax->datum #'len))) (let ((signed? (identifier-eq? #'sign 's))) (lp #'rest (cons #`(name #,bits len #,signed?) fields) (+ bits (syntax->datum #'len)))))))) ((struct field ...) (and (symbolic-match? struct) (not (null? #'(field ...)))) (let lp ((stx #'(field ...)) (fields '()) (offset 0) (align 0)) (syntax-case stx () (() ;; Round up to a multiple of align to get final ;; size. (let ((size (* (ceiling (/ offset align)) align))) #`(struct #,size #,align #,(reverse fields)))) (((name expr) . rest) (identifier? #'name) (let* ((type (compute-layout #'expr #`(struct-field-ref #,accessor 'name) group-ids packed? endianness)) (field-align (alignof/syntax type)) (padding (if packed? 0 (modulo (- field-align (modulo offset field-align)) field-align))) (offset (+ offset padding)) (align (max align field-align))) (lp #'rest (syntax-case #'name () (underscore (identifier-eq? #'underscore '_) fields) (_ (cons #`(name #,offset #,type) fields))) (+ offset (sizeof/syntax type)) align)))))) ((union field ...) (and (symbolic-match? union) (not (null? #'(field ...)))) (let loop ((stx #'(field ...)) (fields '()) (size 0) (align 0)) (syntax-case stx () (() #`(union #,size #,align #,(reverse fields))) (((underscore expr) . rest-exprs) (identifier-eq? #'underscore '_) (let ((type (compute-layout #'expr #f group-ids packed? endianness))) (loop #'rest-exprs fields (max size (sizeof/syntax type)) align))) (((name expr) . rest) (identifier? #'name) (let ((type (compute-layout #'expr #`(struct-field-ref #,accessor 'name) group-ids packed? endianness))) (loop #'rest (cons #`(name #,type) fields) (max size (sizeof/syntax type)) (max align (alignof/syntax type)))))))) ((array length expr) (and (symbolic-match? array) (exact-integer? (syntax->datum #'length)) (positive? (syntax->datum #'length))) (let ((length (syntax->datum #'length)) (type (compute-layout #'expr #`(array-type #,accessor) group-ids packed? endianness))) #`(array #,(* (sizeof/syntax type) length) #,(alignof/syntax type) #,length #,type))) ((ptr expr) (identifier-eq? #'ptr '*) ;; TODO: pointer size depends on the target. (let ((size (ffi:sizeof '*)) (align (ffi:alignof '*))) (let loop ((expr #'expr)) (syntax-case expr () (void (symbolic-match? void) #`(pointer #,size #,align void)) ;; Primitive pointer (prim (identifier-memq? #'prim primitives) (let ((type (compute-layout #'prim #f group-ids packed? endianness))) #`(pointer #,size #,align #,type))) ;; Pointer to a pointer ((ptr expr) (identifier-eq? #'ptr '*) #`(pointer #,size #,align #,(loop #'expr))) ;; Recursive reference to a type within this type group. (type-id (any (lambda (id) (bound-identifier=? #'type-id id)) group-ids) (begin (set! recursive-pointers (cons (list accessor #'type-id) recursive-pointers)) #`(pointer #,size #,align (recur type-id)))) ;; Reference to a type outside of this type group. (type-id (bstruct-descriptor-identifier? #'type-id) #`(pointer #,size #,align type-id)))))))) (define (compute-layout* expr id group-ids) (compute-layout expr #`(bstruct-descriptor-type #,id) group-ids #f (target-endianness))) (define (type-descriptor-id id) (datum->syntax id (symbol-append (string->symbol "% bstruct-descriptor-") (syntax->datum id)))) (syntax-case stx () ((_ (id expr . kwargs) ...) (with-syntax (((type ...) (map (lambda (id* expr) (compute-layout* expr id* #'(id ...))) #'(id ...) #'(expr ...))) ((type-id ...) (map type-descriptor-id #'(id ...))) (((recur-accessor recur-id) ...) recursive-pointers)) (with-syntax ((((size align) ...) (map (lambda (type) (list (sizeof/syntax type) (alignof/syntax type))) #'(type ...)))) #'(begin ;; First, define the type descriptors. (define-bstruct-descriptor type-id id type . kwargs) ... ;; Then tie the knots for recursive pointer types. (set-pointer-type! recur-accessor recur-id) ... ;; Finally, define the wrapper macros. (define-bstruct-macro id type-id size align type . kwargs) ...))))))) (define-syntax define-bstruct (lambda (stx) (define (distinct? lst) (let lp ((remaining lst) (seen '())) (match remaining (() #t) ((x . rest) (if (memq x seen) #f (lp rest (cons x seen))))))) (syntax-case stx () ;; Type group definition. Types defined in the same group can ;; contain recursive pointer references to each other. ((_ (id . args) ...) (and (not (null? #'(id ...))) (every identifier? #'(id ...)) ;; Duplicate ids not allowed in a type group. (distinct? (syntax->datum #'(id ...)))) ;; Handle the special case of opaque types having empty ;; specifications. (with-syntax ((((expr . kwargs) ...) (map (lambda (stx) (syntax-case stx () (() #'((opaque))) (_ stx))) #'(args ...)))) #'(define-bstruct* (id expr . kwargs) ...))) ;; A single type definition is a type group of one. ((_ id . args) (identifier? #'id) #'(define-bstruct (id . args))))))