From 1a375482bf71287801fb4258c6387d92236b6fab Mon Sep 17 00:00:00 2001 From: David Thompson Date: Wed, 10 Feb 2016 22:54:07 -0500 Subject: math: transform: Perform matrix multiplication in Scheme. No more GSL dependency! Thanks Andy Wingo, for unboxed floating point operations! * sly/math/transform.scm (matrix-ref, matrix-set!): Allow inlining. (transform*!): Reimplement in pure Scheme, no FFI call to GSL. --- sly/math/transform.scm | 144 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 136 insertions(+), 8 deletions(-) diff --git a/sly/math/transform.scm b/sly/math/transform.scm index 31881e1..38fd464 100644 --- a/sly/math/transform.scm +++ b/sly/math/transform.scm @@ -31,7 +31,6 @@ #:use-module (sly math) #:use-module (sly math vector) #:use-module (sly math quaternion) - #:use-module (sly wrappers gsl) #:export (make-transform null-transform identity-transform transform? transform-matrix transpose transform-vector2 @@ -52,10 +51,10 @@ (define (make-4x4-matrix) (make-f32vector 16)) -(define (matrix-set! matrix row column x) +(define-inlinable (matrix-set! matrix row column x) (f32vector-set! matrix (+ (* row 4) column) x)) -(define (matrix-ref matrix row column) +(define-inlinable (matrix-ref matrix row column) (f32vector-ref matrix (+ (* row 4) column))) (define (make-transform aa ab ac ad @@ -151,12 +150,141 @@ called without any arguments." result)) (reduce mul identity-transform transforms)) +;; This could be described as a loop, but the result would be much +;; slower matrix multiplication. To enable Guile's optimizer to unbox +;; floating point ops and reduce reads/writes, each matrix element is +;; bound as a local, the loop is completely unrolled, and matrix-ref +;; and matrix-set! are inlined. (define (transform*! dest a b) - (cblas-sgemm cblas-row-major cblas-no-trans cblas-no-trans - 4 4 4 1 - (transform->pointer a) 4 - (transform->pointer b) 4 - 0 (transform->pointer dest) 4)) + (let ((m1 (transform-matrix a)) + (m2 (transform-matrix b)) + (m3 (transform-matrix dest))) + (let ((m1-0-0 (matrix-ref m1 0 0)) + (m1-0-1 (matrix-ref m1 0 1)) + (m1-0-2 (matrix-ref m1 0 2)) + (m1-0-3 (matrix-ref m1 0 3)) + (m1-1-0 (matrix-ref m1 1 0)) + (m1-1-1 (matrix-ref m1 1 1)) + (m1-1-2 (matrix-ref m1 1 2)) + (m1-1-3 (matrix-ref m1 1 3)) + (m1-2-0 (matrix-ref m1 2 0)) + (m1-2-1 (matrix-ref m1 2 1)) + (m1-2-2 (matrix-ref m1 2 2)) + (m1-2-3 (matrix-ref m1 2 3)) + (m1-3-0 (matrix-ref m1 3 0)) + (m1-3-1 (matrix-ref m1 3 1)) + (m1-3-2 (matrix-ref m1 3 2)) + (m1-3-3 (matrix-ref m1 3 3)) + (m2-0-0 (matrix-ref m2 0 0)) + (m2-0-1 (matrix-ref m2 0 1)) + (m2-0-2 (matrix-ref m2 0 2)) + (m2-0-3 (matrix-ref m2 0 3)) + (m2-1-0 (matrix-ref m2 1 0)) + (m2-1-1 (matrix-ref m2 1 1)) + (m2-1-2 (matrix-ref m2 1 2)) + (m2-1-3 (matrix-ref m2 1 3)) + (m2-2-0 (matrix-ref m2 2 0)) + (m2-2-1 (matrix-ref m2 2 1)) + (m2-2-2 (matrix-ref m2 2 2)) + (m2-2-3 (matrix-ref m2 2 3)) + (m2-3-0 (matrix-ref m2 3 0)) + (m2-3-1 (matrix-ref m2 3 1)) + (m2-3-2 (matrix-ref m2 3 2)) + (m2-3-3 (matrix-ref m2 3 3))) + + (matrix-set! m3 0 0 + (+ (* m1-0-0 m2-0-0) + (* m1-0-1 m2-1-0) + (* m1-0-2 m2-2-0) + (* m1-0-3 m2-3-0))) + (matrix-set! m3 0 1 + (+ (* m1-0-0 m2-0-1) + (* m1-0-1 m2-1-1) + (* m1-0-2 m2-2-1) + (* m1-0-3 m2-3-1))) + (matrix-set! m3 0 2 + (+ (* m1-0-0 m2-0-2) + (* m1-0-1 m2-1-2) + (* m1-0-2 m2-2-2) + (* m1-0-3 m2-3-2))) + (matrix-set! m3 0 3 + (+ (* m1-0-0 m2-0-3) + (* m1-0-1 m2-1-3) + (* m1-0-2 m2-2-3) + (* m1-0-3 m2-3-3))) + (matrix-set! m3 1 0 + (+ (* m1-1-0 m2-0-0) + (* m1-1-1 m2-1-0) + (* m1-1-2 m2-2-0) + (* m1-1-3 m2-3-0))) + (matrix-set! m3 1 1 + (+ (* m1-1-0 m2-0-1) + (* m1-1-1 m2-1-1) + (* m1-1-2 m2-2-1) + (* m1-1-3 m2-3-1))) + (matrix-set! m3 1 2 + (+ (* m1-1-0 m2-0-2) + (* m1-1-1 m2-1-2) + (* m1-1-2 m2-2-2) + (* m1-1-3 m2-3-2))) + (matrix-set! m3 1 3 + (+ (* m1-1-0 m2-0-3) + (* m1-1-1 m2-1-3) + (* m1-1-2 m2-2-3) + (* m1-1-3 m2-3-3))) + (matrix-set! m3 2 0 + (+ (* m1-2-0 m2-0-0) + (* m1-2-1 m2-1-0) + (* m1-2-2 m2-2-0) + (* m1-2-3 m2-3-0))) + (matrix-set! m3 2 1 + (+ (* m1-2-0 m2-0-1) + (* m1-2-1 m2-1-1) + (* m1-2-2 m2-2-1) + (* m1-2-3 m2-3-1))) + (matrix-set! m3 2 2 + (+ (* m1-2-0 m2-0-2) + (* m1-2-1 m2-1-2) + (* m1-2-2 m2-2-2) + (* m1-2-3 m2-3-2))) + (matrix-set! m3 2 3 + (+ (* m1-2-0 m2-0-3) + (* m1-2-1 m2-1-3) + (* m1-2-2 m2-2-3) + (* m1-2-3 m2-3-3))) + (matrix-set! m3 3 0 + (+ (* m1-3-0 m2-0-0) + (* m1-3-1 m2-1-0) + (* m1-3-2 m2-2-0) + (* m1-3-3 m2-3-0))) + (matrix-set! m3 3 1 + (+ (* m1-3-0 m2-0-1) + (* m1-3-1 m2-1-1) + (* m1-3-2 m2-2-1) + (* m1-3-3 m2-3-1))) + (matrix-set! m3 3 2 + (+ (* m1-3-0 m2-0-2) + (* m1-3-1 m2-1-2) + (* m1-3-2 m2-2-2) + (* m1-3-3 m2-3-2))) + (matrix-set! m3 3 3 + (+ (* m1-3-0 m2-0-3) + (* m1-3-1 m2-1-3) + (* m1-3-2 m2-2-3) + (* m1-3-3 m2-3-3)))))) + +(define (translate! t . args) + (match args + (($ x y) + (make-transform 1 0 0 0 + 0 1 0 0 + 0 0 1 0 + x y 0 1)) + (($ x y z) + (make-transform 1 0 0 0 + 0 1 0 0 + 0 0 1 0 + x y z 1)))) (define translate (match-lambda -- cgit v1.2.3