From 9148240481fd0c13bed3473748a416e21d6c1a70 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Sat, 9 Nov 2024 10:17:15 -0500 Subject: Rewrite base64 module and add an encoder. --- chickadee/base64.scm | 164 +++++++++++++++++++++++++++++++++++++-------------- tests/base64.scm | 24 ++++++-- 2 files changed, 139 insertions(+), 49 deletions(-) diff --git a/chickadee/base64.scm b/chickadee/base64.scm index 74fd0c5..89fc4bb 100644 --- a/chickadee/base64.scm +++ b/chickadee/base64.scm @@ -1,5 +1,5 @@ ;;; Chickadee Game Toolkit -;;; Copyright © 2023 David Thompson +;;; Copyright © 2024 David Thompson ;;; ;;; Licensed under the Apache License, Version 2.0 (the "License"); ;;; you may not use this file except in compliance with the License. @@ -14,52 +14,126 @@ ;;; limitations under the License. (define-module (chickadee base64) + #:use-module (ice-9 binary-ports) + #:use-module (ice-9 textual-ports) #:use-module (rnrs bytevectors) - #:export (base64-decode)) + #:export (base64-alphabet + base64-url-alphabet + base64-decode + base64-encode)) -(define %base64-chars +(define base64-alphabet "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") +(define base64-url-alphabet + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_") -(define %decode-table - (let ((table (make-hash-table))) - (let loop ((i 0)) - (when (< i 64) - (hashv-set! table (string-ref %base64-chars i) i) - (loop (+ i 1)))) - (hashv-set! table #\= 0) - table)) +(define (alphabet-for-each proc alphabet) + (if (= (string-length alphabet) 64) + (let lp ((i 0) (chars (string->list alphabet))) + (when (< i 64) + (let* ((char (car chars)) + (x (char->integer char))) + ;; FIXME?: Only characters in the + ;; ASCII range are supported. + (unless (< x 128) + (error "invalid base64 alphabet character" char)) + (proc i x) + (lp (1+ i) (cdr chars))))) + (error "invalid base64 alphabet" alphabet))) -(define (base64-decode str) - "Decode the base64 encoded string STR and return a bytevector -containing the decoded data." - (let* ((n (string-length str)) - (groups (/ n 4))) - (define (padding? i) - (eqv? (string-ref str i) #\=)) - (define (decode i) - (let ((c (string-ref str i))) - (or (hashv-ref %decode-table c) - (error 'base64-error "invalid base64 character" c)))) - (define (decode4 group) - (let ((start (* group 4))) - (logior (ash (decode start) 18) - (ash (decode (+ start 1)) 12) - (ash (decode (+ start 2)) 6) - (decode (+ start 3))))) - (unless (exact-integer? groups) - (error 'base64-error "invalid character count")) - (let* ((padding (cond - ((and (padding? (- n 2)) (padding? (- n 1))) 2) - ((padding? (- n 1)) 1) - (else 0))) - (bv (make-bytevector (- (* groups 3) padding)))) - (let loop ((i 0)) - (cond - ((= i (- groups 1)) - (let ((x (ash (decode4 i) (* padding -8)))) - (bytevector-sint-set! bv (* i 3) x (endianness big) (- 3 padding)) - bv)) - ((< i groups) - (bytevector-sint-set! bv (* i 3) (decode4 i) (endianness big) 3) - (loop (+ i 1))))) - bv))) +(define alphabet->decoder + (let ((cache (make-hash-table))) + (lambda (alphabet) + (or (hash-ref cache alphabet) + (let ((bv (make-bytevector 128))) + (alphabet-for-each (lambda (i x) + (bytevector-u8-set! bv x i)) + alphabet) + (hash-set! cache alphabet bv) + bv))))) + +(define* (base64-decode str #:optional (alphabet base64-alphabet)) + "Decode the base64 encoded string @var{str} using @var{alphabet} and +return a bytevector containing the decoded data." + (define decoder (alphabet->decoder alphabet)) + (define (decode char) (bytevector-u8-ref decoder (char->integer char))) + (call-with-input-string str + (lambda (in) + (call-with-output-bytevector + (lambda (out) + (let lp () + (unless (eof-object? (peek-char in)) + (let* ((a (read-char in)) + (b (read-char in)) + (c (read-char in)) + (d (read-char in))) + (cond + ((eq? d #\=) + (if (eq? c #\=) + ;; 2 bytes of padding. + (let ((x (logior (ash (decode a) 6) (decode b)))) + (put-u8 out (ash (logand x #xff0) -4))) + ;; 1 byte of padding. + (let ((x (logior (ash (decode a) 12) + (ash (decode b) 6) + (decode c)))) + (put-u8 out (ash (logand x #x3fc00) -10)) + (put-u8 out (ash (logand x #x3fc) -2)))) + ;; Input should be fully consumed at this point. + (unless (eof-object? (peek-char in)) + (error "invalid base64 string" str))) + (else + (let ((x (logior (ash (decode a) 18) + (ash (decode b) 12) + (ash (decode c) 6) + (decode d)))) + (put-u8 out (ash (logand x #xff0000) -16)) + (put-u8 out (ash (logand x #x00ff00) -8)) + (put-u8 out (logand x #x0000ff)) + (lp)))))))))))) + +(define alphabet->encoder + (let ((cache (make-hash-table))) + (lambda (alphabet) + (or (hash-ref cache alphabet) + (let ((bv (make-bytevector 64))) + (alphabet-for-each (lambda (i x) + (bytevector-u8-set! bv i x)) + alphabet) + (hash-set! cache alphabet bv) + bv))))) + +(define* (base64-encode bv #:optional (alphabet base64-alphabet)) + "Encode the bytevector @var{bv} to a base64 using +@var{alphabet} and return a string containing the encoded data." + (define encoder (alphabet->encoder alphabet)) + (define len (bytevector-length bv)) + (define groups (quotient len 3)) + (define padding (- 3 (remainder len 3))) + (define (encode x) (integer->char (bytevector-u8-ref encoder x))) + (call-with-output-string + (lambda (port) + (let lp ((i 0)) + (let ((offset (* i 3))) + (cond + ((< i groups) + (let ((x (logior (ash (bytevector-u8-ref bv offset) 16) + (ash (bytevector-u8-ref bv (1+ offset)) 8) + (bytevector-u8-ref bv (+ offset 2))))) + (put-char port (encode (ash (logand x #xfc0000) -18))) + (put-char port (encode (ash (logand x #x03f000) -12))) + (put-char port (encode (ash (logand x #x000fc0) -6))) + (put-char port (encode (logand x #x00003f))) + (lp (1+ i)))) + ((eq? padding 1) + (let ((x (logior (ash (bytevector-u8-ref bv offset) 8) + (bytevector-u8-ref bv (1+ offset))))) + (put-char port (encode (ash (logand x #xfc00) -10))) + (put-char port (encode (ash (logand x #x03f0) -4))) + (put-char port (encode (ash (logand x #x000f) 2))) + (put-char port #\=))) + ((eq? padding 2) + (let ((x (bytevector-u8-ref bv offset))) + (put-char port (encode (ash (logand x #xfc) -2))) + (put-char port (encode (ash (logand x #x03) 4))) + (put-string port "=="))))))))) diff --git a/tests/base64.scm b/tests/base64.scm index dba4f3b..c6ebe8f 100644 --- a/tests/base64.scm +++ b/tests/base64.scm @@ -21,12 +21,28 @@ (with-tests "base64" (test-group "base64-decode" - (test-equal "encoded text no padding" + (test-equal "empty string" + "" + (utf8->string (base64-decode ""))) + (test-equal "no padding" "hello!" (utf8->string (base64-decode "aGVsbG8h"))) - (test-equal "encoded text with one byte of padding" + (test-equal "one byte of padding" "hello" (utf8->string (base64-decode "aGVsbG8="))) - (test-equal "encoded text with two bytes of padding" + (test-equal "two bytes of padding" "what's up?" - (utf8->string (base64-decode "d2hhdCdzIHVwPw=="))))) + (utf8->string (base64-decode "d2hhdCdzIHVwPw==")))) + (test-group "base64-encode" + (test-equal "empty string" + "" + (base64-encode (string->utf8 ""))) + (test-equal "no padding" + "aGVsbG8h" + (base64-encode (string->utf8 "hello!"))) + (test-equal "one byte of padding" + "aGVsbG8=" + (base64-encode (string->utf8 "hello"))) + (test-equal "two bytes of padding" + "d2hhdCdzIHVwPw==" + (base64-encode (string->utf8 "what's up?"))))) -- cgit v1.2.3