diff options
author | David Thompson <dthompson2@worcester.edu> | 2024-10-03 08:54:40 -0400 |
---|---|---|
committer | David Thompson <dthompson2@worcester.edu> | 2024-11-10 16:41:29 -0500 |
commit | 416986846fd54f4117efb535a685b68cbd90efc8 (patch) | |
tree | 20450885e7b4f7b745d2d374fb307656b9a38c1a /bstruct.scm |
First commit!
Diffstat (limited to 'bstruct.scm')
-rw-r--r-- | bstruct.scm | 1210 |
1 files changed, 1210 insertions, 0 deletions
diff --git a/bstruct.scm b/bstruct.scm new file mode 100644 index 0000000..2751f07 --- /dev/null +++ b/bstruct.scm @@ -0,0 +1,1210 @@ +;;; guile-bstruct -- Binary structures for Guile +;;; Copyright © 2024 David Thompson <dthompson2@worcester.edu> +;;; +;;; Licensed under the Apache License, Version 2.0 (the "License"); +;;; you may not use this file except in compliance with the License. +;;; You may obtain a copy of the License at +;;; +;;; http://www.apache.org/licenses/LICENSE-2.0 +;;; +;;; Unless required by applicable law or agreed to in writing, software +;;; distributed under the License is distributed on an "AS IS" BASIS, +;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +;;; See the License for the specific language governing permissions and +;;; limitations under the License. + +;;; Commentary: +;; +;; A bstruct is a data type that encapsulates a bytevector and an +;; offset which interprets that bytevector based on a given layout. +;; Bstructs are useful for manipulating C structs when using the FFI, +;; or interpreting/manipulating packed binary data such as GPU vertex +;; buffers, or for data types that benefit from unboxed math +;; optimizations such as vectors and matrices. +;; +;; Inspired by guile-opengl's define-packed-struct and based on +;; "Ftypes: Structured foreign types" by Andy Keep and R. Kent Dybvig. +;; +;; http://scheme2011.ucombinator.org/papers/Keep2011.pdf +;; +;; Features: +;; +;; - Efficiency through macro magic. Procedural macros generate tasty +;; code for the compiler to munch on and emit efficient bytecode. +;; Runtime checks are reduced to the bare minimum. +;; +;; - Raw bytevector access mode. Easily read/write structured data +;; from/to raw bytevectors without going through an intermediary +;; struct. Very useful for batch processing in situations when the +;; overhead of creating wrapper structs would hinder throughput. +;; +;; - Cache friendly. In performance sensitive code, many bstructs can +;; be stored in contiguous memory by pre-allocating a large bytevector +;; for the underlying storage. Individual bstruct handles simply +;; point at different offsets. +;; +;;; Code: + +(define-module (bstruct) + #:use-module (ice-9 exceptions) + #:use-module (ice-9 match) + #:use-module (rnrs bytevectors) + #:use-module (srfi srfi-1) + #:use-module (srfi srfi-9) + #:use-module (srfi srfi-11) + #:use-module (system base target) + #:use-module ((system foreign) #:prefix ffi:) + #:use-module (system syntax) + #:export (bstruct? + bstruct-=? + bstruct-type + bstruct-length + bstruct->sexp + bstruct->pointer + pointer->bstruct + bstruct-wrap + bstruct-unwrap + bstruct-alloc + bstruct-sizeof + bstruct-alignof + bstruct-ref + bstruct-set! + bstruct-unpack + bstruct-pack! + bstruct-copy + bstruct-copy! + define-bstruct)) + +(define-record-type <scalar> + (make-scalar size alignment native? type) + scalar? + (size scalar-size) + (alignment scalar-alignment) + (native? scalar-native?) ; native endianness? + (type scalar-type)) + +(define-record-type <struct-field> + (make-struct-field name offset type) + struct-field? + (name struct-field-name) + (offset struct-field-offset) + (type struct-field-type)) + +(define-record-type <struct> + (make-struct size alignment fields) + %struct? + (size struct-size) + (alignment struct-alignment) + (fields struct-fields)) + +(define-record-type <union-field> + (make-union-field name type) + union-field? + (name union-field-name) + (type union-field-type)) + +(define-record-type <union> + (make-union size alignment fields) + union? + (size union-size) + (alignment union-alignment) + (fields union-fields)) + +(define-record-type <array> + (make-array size alignment length type) + array? + (size array-size) + (alignment array-alignment) + (length array-length) + (type array-type)) + +(define-record-type <pointer> + (make-pointer size alignment type) + pointer? + (size pointer-size) + (alignment pointer-alignment) + ;; Mutable for recursive types. + (type pointer-type set-pointer-type!)) + +(define-record-type <opaque> + (make-opaque) + opaque?) + +;; TODO: functions +;; TODO: bitfields + +(define (struct-field-ref struct name) + (and=> (find (match-lambda + (($ <struct-field> name*) + (eq? name name*))) + (struct-fields struct)) + struct-field-type)) + +(define (union-field-ref struct name) + (and=> (find (match-lambda + (($ <union-field> name*) + (eq? name name*))) + (union-fields struct)) + union-field-type)) + +(define (bstruct-type? obj) + (or (scalar? obj) + (%struct? obj) + (union? obj) + (array? obj) + (pointer? obj) + (opaque? obj))) + +(define (sizeof type) + (match type + ((or ($ <scalar> size) + ($ <struct> size) + ($ <union> size) + ($ <array> size) + ($ <pointer> size)) + size) + (($ <opaque>) + (raise-exception + (make-exception (make-exception-with-message + "cannot get size of opaque type") + (make-exception-with-origin 'sizeof) + (make-exception-with-irritants (list type))))))) + +(define (alignof type) + (match type + ((or ($ <scalar> _ alignment) + ($ <struct> _ alignment) + ($ <union> _ alignment) + ($ <array> _ alignment) + ($ <pointer> _ alignment)) + alignment) + (($ <opaque>) + (raise-exception + (make-exception (make-exception-with-message + "cannot get alignment of opaque type") + (make-exception-with-origin 'sizeof) + (make-exception-with-irritants (list type))))))) + +(define-syntax-rule (assert expr who) + (unless expr + (raise-exception + (make-exception (make-assertion-failure) + (make-exception-with-origin who) + (make-exception-with-irritants '(expr)))))) + +(define-syntax-rule (check-size i n who) + (assert (and (>= i 0) (< i n)) who)) + +(define-inlinable (u64? x) + (and (exact-integer? x) (<= 0 x (ash 1 64)))) + +;; Bstructs form a shallow vtable hierarchy. +(define <bstruct-descriptor> + (make-vtable (string-append standard-vtable-fields "pwpw") + (lambda (desc port) + (format port "#<bstruct-descriptor ~a>" + (object-address desc))))) + +(define (bstruct-descriptor-name descriptor) + (struct-ref descriptor vtable-offset-user)) + +(define (bstruct-descriptor-type descriptor) + (struct-ref descriptor (+ vtable-offset-user 1))) + +(define (bstruct-descriptor-sizeof descriptor) + (sizeof (bstruct-descriptor-type descriptor))) + +(define (print-bstruct bs port) + (format port "#<~a ~a>" + (bstruct-descriptor-name (struct-vtable bs)) + (bstruct->sexp bs))) + +(define* (make-bstruct-descriptor name type #:key (printer print-bstruct)) + (assert (bstruct-type? type) 'make-bstruct-descriptor) + (make-struct/no-tail <bstruct-descriptor> + (make-struct-layout "pwpwpw") + printer name type)) + +(define (bstruct-descriptor? obj) + (and (struct? obj) (eq? (struct-vtable obj) <bstruct-descriptor>))) + +(define (%bstruct? obj) + (and (struct? obj) (bstruct-descriptor? (struct-vtable obj)))) + +(define (bstruct-bytevector bs) + (assert (%bstruct? bs) 'bstruct-bytevector) + (struct-ref bs 0)) + +(define (bstruct-offset bs) + (assert (%bstruct? bs) 'bstruct-bytevector) + (struct-ref bs 1)) + +(define (bstruct-length bs) + (assert (%bstruct? bs) 'bstruct-bytevector) + (struct-ref bs 2)) + +(define (bstruct-type bs) + (assert (%bstruct? bs) 'bstruct-bytevector) + (bstruct-descriptor-type (struct-vtable bs))) + +;; Bstructs are composed of a type descriptor, a bytevector that +;; provides storage, an offset pointing to the start of the struct +;; data within that bytevector, and the number of contiguous structs +;; within. +;; +;; TODO: We could use Guile's bytevector slices here, however they +;; were only introduced in 3.0.9 so we can't rely on them. +(define (%make-bstruct descriptor bv offset n) + (make-struct/no-tail descriptor bv offset n)) + +(define (make-bstruct descriptor bv offset n) + (assert (bstruct-descriptor? descriptor) 'make-bstruct) + (assert (bytevector? bv) 'make-bstruct) + (assert (exact-integer? offset) 'make-bstruct) + (assert (>= offset 0) 'make-bstruct) + (assert (<= (+ offset (* (bstruct-descriptor-sizeof descriptor) n)) + (bytevector-length bv)) + 'make-bstruct) + (%make-bstruct descriptor bv offset n)) + +;; Platform ABI details +(define (non-target-endianness) + (if (eq? (target-endianness) (endianness little)) + (endianness big) + (endianness little))) + +(define (sizeof* type) + (match type + ((or 'u8 's8) 1) + ((or 'u16 's16) 2) + ((or 'u32 's32 'f32) 4) + ((or 'u64 's64 'f64) 8) + (_ + (if (string=? %host-type (target-type)) + (match type + ('uint (ffi:sizeof ffi:unsigned-int)) + ('int (ffi:sizeof ffi:int)) + ('ulong (ffi:sizeof ffi:unsigned-long)) + ('long (ffi:sizeof ffi:long)) + ('ushort (ffi:sizeof ffi:unsigned-short)) + ('short (ffi:sizeof ffi:short)) + ('size_t (ffi:sizeof ffi:size_t)) + ('ssize_t (ffi:sizeof ffi:ssize_t)) + ('ptrdiff_t (ffi:sizeof ffi:ptrdiff_t)) + ('intptr_t (ffi:sizeof ffi:intptr_t)) + ('uintptr_t (ffi:sizeof ffi:uintptr_t))) + ;; FIXME: Fill in with proper ABI details. We will lazily + ;; evaluate this work when we hit a problem in a cross build. + (match type + ('uint 4) + ('int 4) + ('ulong 8) + ('long 8) + ('ushort 2) + ('short 2) + ('size_t (target-word-size)) + ('ssize_t (target-word-size)) + ('ptrdiff_t (target-word-size)) + ('intptr_t (target-word-size)) + ('uintptr_t (target-word-size))))))) + +(define (alignof* type) + (match type + ((or 'u8 's8) 1) + ((or 'u16 's16) 2) + ((or 'u32 's32 'f32) 4) + ((or 'u64 's64 'f64) 8) + (_ + (if (string=? %host-type (target-type)) + (match type + ('uint (ffi:sizeof ffi:unsigned-int)) + ('int (ffi:sizeof ffi:int)) + ('ulong (ffi:sizeof ffi:unsigned-long)) + ('long (ffi:sizeof ffi:long)) + ('ushort (ffi:sizeof ffi:unsigned-short)) + ('short (ffi:sizeof ffi:short)) + ('size_t (ffi:sizeof ffi:size_t)) + ('ssize_t (ffi:sizeof ffi:ssize_t)) + ('ptrdiff_t (ffi:sizeof ffi:ptrdiff_t)) + ('intptr_t (ffi:sizeof ffi:intptr_t)) + ('uintptr_t (ffi:sizeof ffi:uintptr_t)) + ('* (ffi:sizeof '*))) + (match type + ('uint 4) + ('int 4) + ('ulong 8) + ('long 8) + ('ushort 2) + ('short 2) + ('size_t (target-word-size)) + ('ssize_t (target-word-size)) + ('ptrdiff_t (target-word-size)) + ('intptr_t (target-word-size)) + ('uintptr_t (target-word-size)) + ('* (target-word-size))))))) + +;; It is useful to see bstructs in s-expression form when working +;; at the REPL. +(define (bstruct->sexp bs) + (let ((bv (bstruct-bytevector bs))) + (let loop ((type (bstruct-type bs)) (offset (bstruct-offset bs))) + (match type + ((? bstruct-descriptor? desc) + (loop (bstruct-descriptor-type desc) offset)) + (($ <scalar> _ _ native? type) + (let ((e (if native? (target-endianness) (non-target-endianness)))) + (match type + ('u8 (bytevector-u8-ref bv offset)) + ('s8 (bytevector-s8-ref bv offset)) + ('u16 (bytevector-u16-ref bv offset e)) + ('s16 (bytevector-s16-ref bv offset e)) + ('u32 (bytevector-u32-ref bv offset e)) + ('s32 (bytevector-s32-ref bv offset e)) + ('u64 (bytevector-u64-ref bv offset e)) + ('s64 (bytevector-s64-ref bv offset e)) + ('f32 (bytevector-ieee-single-ref bv offset e)) + ('f64 (bytevector-ieee-double-ref bv offset e)) + ('uint (bytevector-uint-ref bv offset e (ffi:sizeof ffi:unsigned-int))) + ('int (bytevector-sint-ref bv offset e (ffi:sizeof ffi:int))) + ('ulong (bytevector-uint-ref bv offset e (ffi:sizeof ffi:unsigned-long))) + ('long (bytevector-sint-ref bv offset e (ffi:sizeof ffi:long))) + ('ushort (bytevector-uint-ref bv offset e (ffi:sizeof ffi:unsigned-short))) + ('short (bytevector-sint-ref bv offset e (ffi:sizeof ffi:short))) + ('size_t (bytevector-uint-ref bv offset e (ffi:sizeof ffi:size_t))) + ('ssize_t (bytevector-sint-ref bv offset e (ffi:sizeof ffi:ssize_t))) + ('ptrdiff_t (bytevector-sint-ref bv offset e (ffi:sizeof ffi:ptrdiff_t))) + ('intptr_t (bytevector-sint-ref bv offset e (ffi:sizeof ffi:intptr_t))) + ('uintptr_t (bytevector-uint-ref bv offset e (ffi:sizeof ffi:uintptr_t)))))) + (($ <struct> _ _ fields) + `(struct ,@(map (match-lambda + (($ <struct-field> name offset* type) + (list name (loop type (+ offset offset*))))) + fields))) + (($ <union> _ _ fields) + `(union ,@(map (match-lambda + (($ <union-field> name type) + (list name (loop type offset)))) + fields))) + (($ <array> _ _ length type) + (let ((size (sizeof type))) + `(array ,@(map (lambda (i) + (loop type (+ offset (* i size)))) + (iota length))))) + (($ <pointer>) + `(* ,(bytevector-uint-ref bv offset (native-endianness) + (ffi:sizeof '*)))))))) + +;; Macro helpers that use metadata attached to bstruct syntax +;; transformers. +(define (bstruct-descriptor-identifier? id) + (and (identifier? id) + (let-values (((kind val) (syntax-local-binding id))) + (and (eq? kind 'macro) + (procedure-property val 'bstruct?))))) + +(define (non-opaque-bstruct-descriptor-identifier? id) + (and (identifier? id) + (let-values (((kind val) (syntax-local-binding id))) + (and (eq? kind 'macro) + (procedure-property val 'bstruct?) + (not (procedure-property val 'bstruct-opaque?)))))) + +(define (bstruct-descriptor-identifier-size id) + (let-values (((_ transformer) (syntax-local-binding id))) + (procedure-property transformer 'bstruct-size))) + +(define (bstruct-descriptor-identifier-alignment id) + (let-values (((_ transformer) (syntax-local-binding id))) + (procedure-property transformer 'bstruct-alignment))) + +;; A predicate that can answer the questions: +;; 1) Is this *any kind* of bstruct? +;; 2) Is this *a specific kind* of bstruct? +(define-syntax bstruct? + (lambda (stx) + (syntax-case stx () + (x + (identifier? #'x) + #'(case-lambda + ((obj) (%bstruct? obj)) + ((<type> obj) + (match obj + (($ <type>) #t) + (_ #f))))) + ((_ obj) + #'(%bstruct? obj)) + ((_ <type> obj) + (bstruct-descriptor-identifier? #'<type>) + #'(match obj + (($ <type>) #t) + (_ #f)))))) + +(define-syntax bstruct-=? + (lambda (stx) + (syntax-case stx () + ((_ <type> a b) + (bstruct-descriptor-identifier? #'<type>) + #'(match a + (($ <type> bv-a offset-a) + (match b + (($ <type> bv-b offset-b) + (let ((n (bstruct-sizeof <type>))) + (let loop ((i 0)) + (cond + ((= i n) #t) + ((= (bytevector-u8-ref bv-a (+ offset-a i)) + (bytevector-u8-ref bv-b (+ offset-b i))) + (loop (+ i 1))) + (else #f)))))))))))) + +;; Create a pointer to the bstruct or some element within the bstruct. +(define-syntax bstruct->pointer + (lambda (stx) + (syntax-case stx () + ((_ (<type> i) bs) + (bstruct-descriptor-identifier? #'<type>) + #'(match bs + (($ <type> bv (? u64? offset)) + (ffi:bytevector->pointer bv offset)))) + ((_ (<type> i) bs (elem ...)) + (bstruct-descriptor-identifier? #'<type>) + #'(match bs + (($ <type> bv (? u64? offset) (? u64? n)) + (check-size i n 'bstruct->pointer) + (let ((offset (+ offset (* (bstruct-sizeof <type>) i)))) + (call-with-values (lambda () ((<type> offset elem ...) bv offset)) + ffi:bytevector->pointer))))) + ((_ (<type> i) bs elem) + #'(bstruct->pointer (<type> i) bs (elem))) + ((_ <type> bs) + #'(bstruct->pointer (<type> 0) bs)) + ((_ <type> bs (elem ...)) + #'(bstruct->pointer (<type> 0) bs (elem ...))) + ((_ <type> bs elem) + #'(bstruct->pointer (<type> 0) bs (elem)))))) + +(define-syntax pointer->bstruct + (lambda (stx) + (syntax-case stx () + ((_ <type> ptr n) + (non-opaque-bstruct-descriptor-identifier? #'<type>) + #'(let ((size (* (bstruct-sizeof <type>) n))) + (make-bstruct <type> (ffi:pointer->bytevector ptr size) 0 n))) + ((_ <type> ptr) + #'(pointer->bstruct <type> ptr 1))))) + +;; Wrap a bytevector in a bstruct. +(define-syntax bstruct-wrap + (lambda (stx) + (syntax-case stx () + ((_ <type> bv offset n) + (non-opaque-bstruct-descriptor-identifier? #'<type>) + #'(make-bstruct <type> bv offset n)) + ((_ <type> bv offset) + #'(bstruct-wrap <type> bv offset 1))))) + +;; Unwrap a bstruct to a bytevector + offset + count. +(define-syntax bstruct-unwrap + (lambda (stx) + (syntax-case stx () + ((_ <type> bs) + (bstruct-descriptor-identifier? #'<type>) + #`(match bs + (($ <type> bv offset n) + (values bv offset n))))))) + +;; Size/align queries. +(define-syntax bstruct-sizeof + (lambda (stx) + (syntax-case stx () + ((_ <type>) + (non-opaque-bstruct-descriptor-identifier? #'<type>) + (bstruct-descriptor-identifier-size #'<type>))))) + +(define-syntax bstruct-alignof + (lambda (stx) + (syntax-case stx () + ((_ <type>) + (non-opaque-bstruct-descriptor-identifier? #'<type>) + (bstruct-descriptor-identifier-alignment #'<type>))))) + +;; 'bstruct-pack!' and 'bstruct-unpack' allow for directly +;; interpreting bytevector contents as structured data. +(define-syntax bstruct-pack! + (lambda (stx) + (define (flatten-elems stx) + (append-map (lambda (stx) + (syntax-case stx (->) + ((-> root-elem sub-elems ...) + (identifier? #'root-elem) + (map (lambda (stx) + (syntax-case stx () + (((sub-elem ...) val) + #'((root-elem sub-elem ...) val)))) + (flatten-elems #'(sub-elems ...)))) + (((elem ...) val) + #'(((elem ...) val))) + ((elem val) + #'(((elem) val))))) + stx)) + (syntax-case stx () + ((_ <type> bv i elem ...) + (non-opaque-bstruct-descriptor-identifier? #'<type>) + (with-syntax (((((elem ...) val) ...) + (flatten-elems #'(elem ...)))) + #'(begin + ((<type> setter elem ...) bv i val) + ...)))))) +(define-syntax bstruct-unpack + (lambda (stx) + (syntax-case stx () + ((_ <type> bv i elem ...) + (non-opaque-bstruct-descriptor-identifier? #'<type>) + #`(values + #,@(map (lambda (elem) + (syntax-case elem () + ((e ...) + #'((<type> getter e ...) bv i)) + (e + #'((<type> getter e) bv i)))) + #'(elem ...))))))) + +;; Allocate a fresh bstruct that wraps a fresh bytevector big +;; enough to store the entire structure. +(define-syntax bstruct-alloc + (syntax-rules () + ((_ (<type> n) (i elem ...) ...) + (let* ((size (bstruct-sizeof <type>)) + (len (* size n)) + (bv (make-bytevector len 0))) + (bstruct-pack! <type> bv (* size i) elem ...) + ... + (bstruct-wrap <type> bv 0))) + ((_ <type> elem ...) + (bstruct-alloc (<type> 1) (0 elem ...))))) + +;; Return the value of some elements. +(define-syntax bstruct-ref + (syntax-rules () + ((_ (<type> i) bs elem ...) ; array + (match bs + (($ <type> bv (? u64? offset) (? u64? n)) + (assert (u64? i) 'bstruct-ref) + (check-size i n 'bstruct-ref) + (let ((offset (+ offset (* (bstruct-sizeof <type>) i)))) + (bstruct-unpack <type> bv offset elem ...))))) + ((_ <type> bs elem ...) + (bstruct-ref (<type> 0) bs elem ...)))) + +;; Set the value of some elements. +(define-syntax bstruct-set! + (syntax-rules () + ((_ (<type> i) bs elem ...) ; array + (match bs + (($ <type> bv (? u64? offset) (? u64? n)) + (assert (u64? i) 'bstruct-set!) + (check-size i n 'bstruct-set!) + (let ((offset (+ offset (* (bstruct-sizeof <type>) i)))) + (bstruct-pack! <type> bv offset elem ...))))) + ((_ <type> bs elem ...) + (bstruct-set! (<type> 0) bs elem ...)))) + +;; Imperative/functional struct copying. +(define-syntax-rule (bstruct-copy! <type> src dst) + (match src + (($ <type> src-bv (? u64? src-offset) (? u64? src-n)) + (match dst + (($ <type> dst-bv (? u64? dst-offset) (? u64? dst-n)) + (check-size src-n (1+ dst-n) 'bstruct-copy!) + (bytevector-copy! src-bv src-offset + dst-bv dst-offset + (* (bstruct-sizeof <type>) src-n))))))) +(define-syntax-rule (bstruct-copy <type> src) + (match src + (($ <type> _ _ (? u64? n)) + (let ((dst (bstruct-alloc (<type> n)))) + (bstruct-copy! <type> src dst) + dst)))) + +(define (identifier-eq? stx sym) + (and (identifier? stx) (eq? (syntax->datum stx) sym))) + +;; The big gnarly procedural macro! Buckle up! +(define-syntax define-bstruct + (lambda (stx) + (define primitives + '(u8 s8 u16 s16 u32 s32 u64 s64 f32 f64 + int uint long ulong short ushort + size_t ssize_t ptrdiff_t intptr_t uintptr_t)) + (define (target-endianness? e) + (eq? e (target-endianness))) + (define (resolve-endianness e) + (match e + ('native (target-endianness)) + ('non-native (non-target-endianness)) + (_ e))) + (define (identifier-memq? stx syms) + (and (identifier? stx) (memq (syntax->datum stx) syms))) + ;; Primitive getter/setter helpers + (define (ref/endianness proc endianness) + #`(lambda (bv i) + (#,proc bv i #,endianness))) + (define (set!/endianness proc endianness) + #`(lambda (bv i x) + (#,proc bv i x #,endianness))) + (define (uint-ref size endianness) + #`(lambda (bv i) + (bytevector-uint-ref bv i '#,(datum->syntax #f endianness) #,size))) + (define (uint-set! size endianness) + #`(lambda (bv i x) + (bytevector-uint-set! bv i x '#,(datum->syntax #f endianness) #,size))) + (define (sint-ref size endianness) + #`(lambda (bv i) + (bytevector-sint-ref bv i '#,(datum->syntax #f endianness) #,size))) + (define (sint-set! size endianness) + #`(lambda (bv i x) + (bytevector-sint-set! bv i x '#,(datum->syntax #f endianness) #,size))) + ;; Scalar types are divided into two categories: machine indepenent + ;; and machine dependent. The machine independent types (i32, f32, + ;; etc.) have a known size and alignment on all platforms. The + ;; machine dependent types have a size and alignment that can vary + ;; depending on the ABI of the system that is compiling the code. + (define (abi-ref type e) ; e for endianness + (match type + ('uint (uint-ref (sizeof* 'uint) e)) + ('int (sint-ref (sizeof* 'int) e)) + ('ulong (uint-ref (sizeof* 'ulong) e)) + ('long (sint-ref (sizeof* 'long) e)) + ('ushort (uint-ref (sizeof* 'ushort) e)) + ('short (sint-ref (sizeof* 'short) e)) + ('size_t (uint-ref (sizeof* 'size_t) e)) + ('ssize_t (sint-ref (sizeof* 'ssize_t) e)) + ('ptrdiff_t (sint-ref (sizeof* 'ptrdiff_t) e)) + ('intptr_t (sint-ref (sizeof* 'intptr_t) e)) + ('uintptr_t (uint-ref (sizeof* 'uintptr_t) e)))) + (define (abi-set! type e) + (match type + ('uint (uint-set! (sizeof* 'uint) e)) + ('int (sint-set! (sizeof* 'int) e)) + ('ulong (uint-set! (sizeof* 'ulong) e)) + ('long (sint-set! (sizeof* 'long) e)) + ('ushort (uint-set! (sizeof* 'ushort) e)) + ('short (sint-set! (sizeof* 'short) e)) + ('size_t (uint-set! (sizeof* 'size_t) e)) + ('ssize_t (sint-set! (sizeof* 'ssize_t) e)) + ('ptrdiff_t (sint-set! (sizeof* 'ptrdiff_t) e)) + ('intptr_t (sint-set! (sizeof* 'intptr_t) e)) + ('uintptr_t (uint-set! (sizeof* 'uintptr_t) e)))) + (define (primitive-getter size native? type) + (if native? + (match type + ('u8 #'bytevector-u8-ref) + ('s8 #'bytevector-s8-ref) + ('u16 #'bytevector-u16-native-ref) + ('s16 #'bytevector-s16-native-ref) + ('u32 #'bytevector-u32-native-ref) + ('s32 #'bytevector-s32-native-ref) + ('u64 #'bytevector-u64-native-ref) + ('s64 #'bytevector-s64-native-ref) + ('f32 #'bytevector-ieee-single-native-ref) + ('f64 #'bytevector-ieee-double-native-ref) + (_ (abi-ref type (target-endianness)))) + (let ((e (non-target-endianness))) + (match type + ('u8 #'bytevector-u8-ref) + ('s8 #'bytevector-s8-ref) + ('u16 (ref/endianness #'bytevector-u16-ref e)) + ('s16 (ref/endianness #'bytevector-s16-ref e)) + ('u32 (ref/endianness #'bytevector-u32-ref e)) + ('s32 (ref/endianness #'bytevector-s32-ref e)) + ('u64 (ref/endianness #'bytevector-u64-ref e)) + ('s64 (ref/endianness #'bytevector-s64-ref e)) + ('f32 (ref/endianness #'bytevector-ieee-single-ref e)) + ('f64 (ref/endianness #'bytevector-ieee-double-ref e)) + (_ (abi-ref type e)))))) + (define (primitive-setter size native? type) + (if native? + (match type + ('u8 #'bytevector-u8-set!) + ('s8 #'bytevector-s8-set!) + ('u16 #'bytevector-u16-native-set!) + ('s16 #'bytevector-s16-native-set!) + ('u32 #'bytevector-u32-native-set!) + ('s32 #'bytevector-s32-native-set!) + ('u64 #'bytevector-u64-native-set!) + ('s64 #'bytevector-s64-native-set!) + ('f32 #'bytevector-ieee-single-native-set!) + ('f64 #'bytevector-ieee-double-native-set!) + (_ (abi-set! type (target-endianness)))) + (let ((e (non-target-endianness))) + (match type + ('u8 #'bytevector-u8-set!) + ('s8 #'bytevector-s8-set!) + ('u16 (set!/endianness #'bytevector-u16-set! e)) + ('s16 (set!/endianness #'bytevector-s16-set! e)) + ('u32 (set!/endianness #'bytevector-u32-set! e)) + ('s32 (set!/endianness #'bytevector-s32-set! e)) + ('u64 (set!/endianness #'bytevector-u64-set! e)) + ('s64 (set!/endianness #'bytevector-s64-set! e)) + ('f32 (set!/endianness #'bytevector-ieee-single-set! e)) + ('f64 (set!/endianness #'bytevector-ieee-double-set! e)) + (_ (abi-set! type e)))))) + ;; Types can be recursive by referencing a type name within the + ;; same type group in a pointer expression. + ;; + ;; For example: + ;; + ;; (define-bstruct linked-list + ;; (struct (item int) (next (* linked-list)))) + ;; + ;; To make this work, we keep a side table mapping type names to + ;; pointer type accessor forms that need to be patched with a + ;; cyclical reference *after* all the types are defined. + (define recursive-pointers (make-hash-table)) + (define (sizeof type) + (match type + ((or ('scalar size _ _ _) + ('struct size _ _) + ('union size _ _) + ('array size _ _ _) + ('pointer size _ _)) + size) + (('opaque) 0) + ((? bstruct-descriptor-identifier?) + (bstruct-descriptor-identifier-size type)))) + (define (alignof type) + (match type + ((or ('scalar _ align _ _) + ('struct _ align _) + ('union _ align _) + ('array _ align _ _) + ('pointer _ align _)) + align) + (('opaque) 0) + ((? bstruct-descriptor-identifier?) + (bstruct-descriptor-identifier-alignment type)))) + (define (compute-type expr accessor type-ids packed? endianness) + (syntax-case expr () + ;; Modifiers + ((packed expr) + (identifier-eq? #'packed 'packed) + (compute-type #'expr accessor type-ids #t endianness)) + ((unpacked expr) + (identifier-eq? #'unpacked 'unpacked) + (compute-type #'expr accessor type-ids #f endianness)) + ((endian e expr) + (and (identifier-eq? #'endian 'endian) + (identifier-memq? #'e '(native non-native big little))) + (compute-type #'expr accessor type-ids packed? + (resolve-endianness (syntax->datum #'e)))) + ;; Previously defined types. 'packed?' and 'endianness' do + ;; not apply here. + (type-id + (bstruct-descriptor-identifier? #'type-id) + #'type-id) + ;; Primitive scalars + (primitive + (identifier-memq? #'primitive primitives) + (let ((type (syntax->datum #'primitive))) + `(scalar ,(sizeof* type) + ,(alignof* type) + ,(target-endianness? endianness) + ,type))) + ;; Structs + ((struct (field-name field-expr) ...) + (and (identifier-eq? #'struct 'struct) + (not (null? #'(field-name ...))) + (every identifier? #'(field-name ...))) + (let loop ((field-exprs #'((field-name field-expr) ...)) + (fields '()) (offset 0) (alignment 0)) + (syntax-case field-exprs () + (() + ;; Round up to a multiple of alignment to get final + ;; size. + (let ((size (* (ceiling (/ offset alignment)) alignment))) + `(struct ,size ,alignment ,(reverse fields)))) + ;; An underscore indicates a pseudo-field that is just + ;; for padding. It is not included in the struct field + ;; list and just adds to the offset. + (((underscore expr) . rest-exprs) + (identifier-eq? #'underscore '_) + (let ((type (compute-type #'expr #f type-ids packed? endianness))) + (loop #'rest-exprs fields (+ offset (sizeof type)) alignment))) + (((name expr) . rest-exprs) + (let* ((type (compute-type #'expr + #`(struct-field-ref #,accessor 'name) + type-ids packed? endianness)) + (field-alignment (alignof type)) + (padding (if packed? + 0 + (modulo (- field-alignment + (modulo offset field-alignment)) + field-alignment))) + (offset (+ offset padding)) + (alignment (max alignment field-alignment))) + (loop #'rest-exprs + (cons (list (syntax->datum #'name) offset type) fields) + (+ offset (sizeof type)) + alignment)))))) + ;; Unions + ((union (field-name field-expr) ...) + (and (identifier-eq? #'union 'union) + (not (null? #'(field-name ...))) + (every identifier? #'(field-name ...))) + (let loop ((field-exprs #'((field-name field-expr) ...)) + (fields '()) (size 0) (alignment 0)) + (syntax-case field-exprs () + (() + `(union ,size ,alignment ,(reverse fields))) + (((underscore expr) . rest-exprs) + (identifier-eq? #'underscore '_) + (let ((type (compute-type #'expr #f type-ids packed? endianness))) + (loop #'rest-exprs fields (max size (sizeof type)) alignment))) + (((name expr) . rest-exprs) + (let ((type (compute-type #'expr + #`(struct-field-ref #,accessor 'name) + type-ids packed? endianness))) + (loop #'rest-exprs + (cons (list (syntax->datum #'name) type) fields) + (max size (sizeof type)) + (max alignment (alignof type)))))))) + ;; Arrays + ((array length expr) + (and (identifier-eq? #'array 'array) + (exact-integer? (syntax->datum #'length)) + (positive? (syntax->datum #'length))) + (let ((length (syntax->datum #'length)) + (type (compute-type #'expr #`(array-type #,accessor) + type-ids packed? endianness))) + `(array ,(* (sizeof type) length) ,(alignof type) ,length ,type))) + ;; Pointers + ((pointer expr) + (identifier-eq? #'pointer '*) + (let ((size (ffi:sizeof '*)) + (align (ffi:alignof '*))) + (let loop ((expr #'expr)) + (syntax-case expr () + ;; Void pointer + (void + (identifier-eq? #'void 'void) + `(pointer ,size ,align void)) + ;; Primitive pointer + (prim + (identifier-memq? #'prim primitives) + `(pointer ,size ,align + ,(compute-type #'prim #f type-ids packed? endianness))) + ;; Pointer to a pointer + ((pointer expr) + (identifier-eq? #'pointer '*) + `(pointer ,size ,align ,(loop #'expr))) + ;; Recursive reference to a type within this type group. + (type-id + (any (lambda (id) (bound-identifier=? #'type-id id)) type-ids) + (let ((pointer `(pointer ,size ,align (recur ,#'type-id)))) + ;; Make a note that the recursive reference needs to be + ;; made via mutation after all types in the group are + ;; defined. + (hashq-set! recursive-pointers + (syntax->datum #'type-id) + (cons accessor + (hashq-ref recursive-pointers + (syntax->datum #'type-id) + '()))) + pointer)) + ;; Reference to a type outside of this type group. + (type-id + (bstruct-descriptor-identifier? #'type-id) + `(pointer ,size ,align ,#'type-id)))))) + ;; Opaque types + (opaque + (identifier-eq? #'opaque 'opaque) + '(opaque)))) + (define (type->syntax type) + (match type + ((? identifier? type) + type) + (('scalar size alignment native? type) + #`(make-scalar #,size #,alignment #,native? '#,(datum->syntax #f type))) + (('struct size alignment fields) + (let ((fields* (map (match-lambda + ((name offset type) + #`(make-struct-field '#,(datum->syntax #f name) + #,offset + #,(type->syntax type)))) + fields))) + #`(make-struct #,size #,alignment (list #,@fields*)))) + (('union size alignment fields) + (let ((fields* (map (match-lambda + ((name type) + #`(make-union-field '#,(datum->syntax #f name) + #,(type->syntax type)))) + fields))) + #`(make-union #,size #,alignment (list #,@fields*)))) + (('array size alignment length type) + #`(make-array #,size #,alignment #,length #,(type->syntax type))) + (('pointer size alignment ('recur type)) + #`(make-pointer #,size #,alignment #f)) + (('pointer size alignment 'void) + #`(make-pointer #,size #,alignment #f)) + (('pointer size alignment type) + #`(make-pointer #,size #,alignment #,(type->syntax type))) + (('opaque) + #'(make-opaque)))) + (define (expand-accessor proc stx id op type group) + #`(syntax-case #,stx () + (() #'#,(proc id 0)) ; self reference + ((elem :::) + #,(let loop ((stx #'#'(elem :::)) (type type) (offset 0)) + (match type + ((? bstruct-descriptor-identifier? type) + ;; Recursively invoke macro for referenced type to produce + ;; the accessor. + #`(syntax-case #,stx () + ((elem :::) + #'#,(proc #`(#,type #,(datum->syntax #f op) elem :::) + offset)))) + (('scalar _ _ _ _) + #`(syntax-case #,stx () + (() #'#,(proc type offset)))) + (('struct _ _ fields) + #`(syntax-case #,stx () + ((e elem :::) + (match (syntax->datum #'e) + #,@(map (match-lambda + ((name offset* type) + #`('#,(datum->syntax #f name) + #,(loop #'#'(elem :::) type + #`(+ #,offset #,offset*))))) + fields) + (_ #'(error "no such struct field" 'e)))))) + (('union _ _ fields) + #`(syntax-case #,stx () + ((e elem :::) + (match (syntax->datum #'e) + #,@(map (match-lambda + ((name type) + #`('#,(datum->syntax #f name) + #,(loop #'#'(elem :::) type offset)))) + fields) + (_ #'(error "no such union field" 'e)))))) + (('array _ _ length type) + ;; Need to generate a unique name here to capture + ;; the 'e' containing the array index expression as + ;; 'e' could get shadowed later. + (with-syntax (((i) (generate-temporaries '(i)))) + #`(syntax-case #,stx () + ((e elem :::) + (with-syntax ((i #'e)) + #,(loop #'#'(elem :::) + type + #`(+ #,offset + (* (let () + ;; if 'i' is a constant then + ;; these checks will be elided + ;; by the compiler. + (assert (u64? i) 'bstruct-accessor) + (assert (< -1 i #,length) + 'bstruct-accessor) + i) + #,(sizeof type))))))))) + ;; Void pointers can only be referenced, not + ;; dereferenced. + (('pointer _ _ 'void) + #`(syntax-case #,stx () + (() #'#,(proc type offset)))) + (('pointer _ _ type*) + (call-with-values (lambda () + (match type* + (('recur type*) + (values type* #t)) + (type* + (values type* #f)))) + (lambda (type* recur?) + #`(let () + (define (expand-pointer-accessor stx) + (syntax-case stx () + ;; Pointer reference + (() #'#,(proc type offset)) + ;; Pointer dereference. Sigh, it's + ;; complicated... + (((* index) elem :::) + (identifier-eq? #'* '*) + (let ((offset #,offset) + (e '#,(datum->syntax #f (target-endianness))) + (ptr-size #,(target-word-size)) + ;; For recursive types, we don't + ;; yet have a defined macro to + ;; query for the size, so we have + ;; to look it up in the group + ;; alist. + (size #,(sizeof (if recur? + (assoc-ref group type*) + type*))) + (body #,(match type* + ((? syntax?) + #`#'#,(proc #`(#,type* + #,(datum->syntax #f op) + elem :::) + 0)) + (_ + (loop #'#'(elem :::) type* 0))))) + ;; 'bv' and 'i' are the lexical + ;; variables containing the + ;; bytevector and offset for + ;; getting/setting. Every time we + ;; encounter a pointer dereference we + ;; need to shadow the old variables + ;; with new ones. + #`(let* ((e '#,(datum->syntax #f e)) + (base (bytevector-sint-ref bv (+ i #,offset) + e #,ptr-size)) + (address (+ base (* #,size index))) + (ptr (ffi:make-pointer address)) + (bv (ffi:pointer->bytevector ptr #,size)) + (i 0)) + #,body))) + ;; Pointer dereference with implied + ;; index of 0. + ((* elem :::) + (identifier-eq? #'* '*) + (expand-pointer-accessor #'((* 0) elem :::))))) + (expand-pointer-accessor #,stx))))) + (('opaque) + #'(error "cannot access opaque type"))))))) + (define (macro-for-type id type-id type group) + (define (self-identifier? other) + (eq? id other)) + (let ((self-size (sizeof type))) + #`(define-syntax #,id + (with-ellipsis ::: + (lambda (stx) + ;; Since syntax transformers are procedures, we can + ;; stash useful information in procedure properties that + ;; 'define-bstruct' can use if/when a bstruct type + ;; is referenced within another type definition. + #((bstruct? . #t) + (bstruct-opaque? . #,(eq? type '(opaque))) + (bstruct-size . #,(sizeof type)) + (bstruct-alignment . #,(alignof type))) + (syntax-case stx () + ;; Identifier syntax to provide the illusion that this + ;; macro is just an ordinary variable. + (self + (identifier? #'self) + #'#,type-id) + ;; Private interface for code generation. + ((_ offset elem :::) + (identifier-eq? #'offset 'offset) + #`(lambda (bv i) + #,#,(expand-accessor + (lambda (type offset) + (match type + ((and sub-offset ((? syntax?) ...)) + #`(#,sub-offset bv (+ i #,offset))) + (_ #`(values bv #,offset)))) + #'#'(elem :::) id 'offset type group))) + ((_ getter elem :::) + (identifier-eq? #'getter 'getter) + #`(lambda (bv i) + #,#,(expand-accessor + (lambda (type offset) + (match type + ((or (? self-identifier?) + (? bstruct-descriptor-identifier?)) + #`(make-bstruct #,type bv (+ i #,offset))) + ((and sub-get ((? syntax?) ...)) + #`(#,sub-get bv (+ i #,offset))) + (('pointer _ _ 'void) + #`(ffi:make-pointer + (bytevector-sint-ref bv (+ i #,offset) + '#,(datum->syntax + #f (target-endianness)) + #,(target-word-size)))) + (('pointer size _ (or ('recur type) type)) + #`(ffi:make-pointer + (bytevector-sint-ref bv (+ i #,offset) + '#,(datum->syntax + #f (target-endianness)) + #,(target-word-size)))) + (('scalar size align native? type) + #`(#,(primitive-getter size native? type) + bv (+ i #,offset))))) + #'#'(elem :::) id 'getter type group))) + ((_ setter elem :::) + (identifier-eq? #'setter 'setter) + #`(lambda (bv i x) + #,#,(expand-accessor + (lambda (type offset) + (match type + ((? self-identifier?) + #`(match x + (($ #,type src j) + (bytevector-copy! src j bv + (+ i #,offset) + #,self-size)))) + ((? bstruct-descriptor-identifier?) + #`(match x + (($ #,type src j) + (bytevector-copy! src j bv + (+ i #,offset) + #,(sizeof type))))) + ((and sub-set! ((? syntax?) ...)) + #`(#,sub-set! bv (+ i #,offset) x)) + (('pointer size _ 'void) + #`(bytevector-sint-set! bv (+ i #,offset) + (ffi:pointer-address x) + '#(datum->syntax + #f (target-endianness)) + #,(target-word-size))) + (('pointer size _ (or ('recur type) type)) + #`(bytevector-sint-set! bv (+ i #,offset) + (ffi:pointer-address x) + '#(datum->syntax + #f (target-endianness)) + #,(target-word-size))) + (('scalar size align native? type) + (let ((setter (primitive-setter size native? type))) + #`(#,setter bv (+ i #,offset) x))))) + #'#'(elem :::) id 'setter type group))))))))) + (define (type-descriptor-id id) + (datum->syntax id + (symbol-append (string->symbol "% bstruct-descriptor-") + (syntax->datum id)))) + (define (recursive-pointer-accessors id) + (hashq-ref recursive-pointers (syntax->datum id) '())) + (syntax-case stx () + ;; Type group definition. Types defined in the same group can + ;; contain recursive pointer references to each other. + ((_ (id expr . kwargs) ...) + (not (null? #'(id ...))) + (let* ((types (map (lambda (id* expr) + (compute-type expr + #`(bstruct-descriptor-type #,id*) + #'(id ...) #f (target-endianness))) + #'(id ...) + #'(expr ...))) + (group (map cons #'(id ...) types))) + (with-syntax (((type-id ...) (map type-descriptor-id #'(id ...))) + ((type-stx ...) (map type->syntax types)) + (((accessor ...) ...) (map recursive-pointer-accessors #'(id ...)))) + (with-syntax (((macros ...) + (map (lambda (id type-id type) + (macro-for-type id type-id type group)) + #'(id ...) #'(type-id ...) types))) + #`(begin + ;; First, define the descriptors using gensym'd names. + (define type-id + (make-bstruct-descriptor 'id type-stx . kwargs)) + ... + ;; Then tie the knot for recursive pointer types. + (set-pointer-types id accessor ...) ... + ;; Finally, define macros using the true names that + ;; wrap the gensym'd variables. + macros ...))))) + ;; A single type definition is a type group of one. + ((_ id . args) + #'(define-bstruct (id . args)))))) + +(define-syntax-rule (set-pointer-types id accessor ...) + (begin + (set-pointer-type! accessor id) ...)) |