summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Thompson <dthompson2@worcester.edu>2024-11-09 10:17:15 -0500
committerDavid Thompson <dthompson2@worcester.edu>2024-11-09 10:17:15 -0500
commit9148240481fd0c13bed3473748a416e21d6c1a70 (patch)
tree143c26925ae2647d3a7b98611c39b0916debab51
parent31b0a07f6a3ea3c37ae39740948f8d6471d8e5a3 (diff)
Rewrite base64 module and add an encoder.
-rw-r--r--chickadee/base64.scm164
-rw-r--r--tests/base64.scm24
2 files changed, 139 insertions, 49 deletions
diff --git a/chickadee/base64.scm b/chickadee/base64.scm
index 74fd0c5..89fc4bb 100644
--- a/chickadee/base64.scm
+++ b/chickadee/base64.scm
@@ -1,5 +1,5 @@
;;; Chickadee Game Toolkit
-;;; Copyright © 2023 David Thompson <dthompson2@worcester.edu>
+;;; Copyright © 2024 David Thompson <dthompson2@worcester.edu>
;;;
;;; Licensed under the Apache License, Version 2.0 (the "License");
;;; you may not use this file except in compliance with the License.
@@ -14,52 +14,126 @@
;;; limitations under the License.
(define-module (chickadee base64)
+ #:use-module (ice-9 binary-ports)
+ #:use-module (ice-9 textual-ports)
#:use-module (rnrs bytevectors)
- #:export (base64-decode))
+ #:export (base64-alphabet
+ base64-url-alphabet
+ base64-decode
+ base64-encode))
-(define %base64-chars
+(define base64-alphabet
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
+(define base64-url-alphabet
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_")
-(define %decode-table
- (let ((table (make-hash-table)))
- (let loop ((i 0))
- (when (< i 64)
- (hashv-set! table (string-ref %base64-chars i) i)
- (loop (+ i 1))))
- (hashv-set! table #\= 0)
- table))
+(define (alphabet-for-each proc alphabet)
+ (if (= (string-length alphabet) 64)
+ (let lp ((i 0) (chars (string->list alphabet)))
+ (when (< i 64)
+ (let* ((char (car chars))
+ (x (char->integer char)))
+ ;; FIXME?: Only characters in the
+ ;; ASCII range are supported.
+ (unless (< x 128)
+ (error "invalid base64 alphabet character" char))
+ (proc i x)
+ (lp (1+ i) (cdr chars)))))
+ (error "invalid base64 alphabet" alphabet)))
-(define (base64-decode str)
- "Decode the base64 encoded string STR and return a bytevector
-containing the decoded data."
- (let* ((n (string-length str))
- (groups (/ n 4)))
- (define (padding? i)
- (eqv? (string-ref str i) #\=))
- (define (decode i)
- (let ((c (string-ref str i)))
- (or (hashv-ref %decode-table c)
- (error 'base64-error "invalid base64 character" c))))
- (define (decode4 group)
- (let ((start (* group 4)))
- (logior (ash (decode start) 18)
- (ash (decode (+ start 1)) 12)
- (ash (decode (+ start 2)) 6)
- (decode (+ start 3)))))
- (unless (exact-integer? groups)
- (error 'base64-error "invalid character count"))
- (let* ((padding (cond
- ((and (padding? (- n 2)) (padding? (- n 1))) 2)
- ((padding? (- n 1)) 1)
- (else 0)))
- (bv (make-bytevector (- (* groups 3) padding))))
- (let loop ((i 0))
- (cond
- ((= i (- groups 1))
- (let ((x (ash (decode4 i) (* padding -8))))
- (bytevector-sint-set! bv (* i 3) x (endianness big) (- 3 padding))
- bv))
- ((< i groups)
- (bytevector-sint-set! bv (* i 3) (decode4 i) (endianness big) 3)
- (loop (+ i 1)))))
- bv)))
+(define alphabet->decoder
+ (let ((cache (make-hash-table)))
+ (lambda (alphabet)
+ (or (hash-ref cache alphabet)
+ (let ((bv (make-bytevector 128)))
+ (alphabet-for-each (lambda (i x)
+ (bytevector-u8-set! bv x i))
+ alphabet)
+ (hash-set! cache alphabet bv)
+ bv)))))
+
+(define* (base64-decode str #:optional (alphabet base64-alphabet))
+ "Decode the base64 encoded string @var{str} using @var{alphabet} and
+return a bytevector containing the decoded data."
+ (define decoder (alphabet->decoder alphabet))
+ (define (decode char) (bytevector-u8-ref decoder (char->integer char)))
+ (call-with-input-string str
+ (lambda (in)
+ (call-with-output-bytevector
+ (lambda (out)
+ (let lp ()
+ (unless (eof-object? (peek-char in))
+ (let* ((a (read-char in))
+ (b (read-char in))
+ (c (read-char in))
+ (d (read-char in)))
+ (cond
+ ((eq? d #\=)
+ (if (eq? c #\=)
+ ;; 2 bytes of padding.
+ (let ((x (logior (ash (decode a) 6) (decode b))))
+ (put-u8 out (ash (logand x #xff0) -4)))
+ ;; 1 byte of padding.
+ (let ((x (logior (ash (decode a) 12)
+ (ash (decode b) 6)
+ (decode c))))
+ (put-u8 out (ash (logand x #x3fc00) -10))
+ (put-u8 out (ash (logand x #x3fc) -2))))
+ ;; Input should be fully consumed at this point.
+ (unless (eof-object? (peek-char in))
+ (error "invalid base64 string" str)))
+ (else
+ (let ((x (logior (ash (decode a) 18)
+ (ash (decode b) 12)
+ (ash (decode c) 6)
+ (decode d))))
+ (put-u8 out (ash (logand x #xff0000) -16))
+ (put-u8 out (ash (logand x #x00ff00) -8))
+ (put-u8 out (logand x #x0000ff))
+ (lp))))))))))))
+
+(define alphabet->encoder
+ (let ((cache (make-hash-table)))
+ (lambda (alphabet)
+ (or (hash-ref cache alphabet)
+ (let ((bv (make-bytevector 64)))
+ (alphabet-for-each (lambda (i x)
+ (bytevector-u8-set! bv i x))
+ alphabet)
+ (hash-set! cache alphabet bv)
+ bv)))))
+
+(define* (base64-encode bv #:optional (alphabet base64-alphabet))
+ "Encode the bytevector @var{bv} to a base64 using
+@var{alphabet} and return a string containing the encoded data."
+ (define encoder (alphabet->encoder alphabet))
+ (define len (bytevector-length bv))
+ (define groups (quotient len 3))
+ (define padding (- 3 (remainder len 3)))
+ (define (encode x) (integer->char (bytevector-u8-ref encoder x)))
+ (call-with-output-string
+ (lambda (port)
+ (let lp ((i 0))
+ (let ((offset (* i 3)))
+ (cond
+ ((< i groups)
+ (let ((x (logior (ash (bytevector-u8-ref bv offset) 16)
+ (ash (bytevector-u8-ref bv (1+ offset)) 8)
+ (bytevector-u8-ref bv (+ offset 2)))))
+ (put-char port (encode (ash (logand x #xfc0000) -18)))
+ (put-char port (encode (ash (logand x #x03f000) -12)))
+ (put-char port (encode (ash (logand x #x000fc0) -6)))
+ (put-char port (encode (logand x #x00003f)))
+ (lp (1+ i))))
+ ((eq? padding 1)
+ (let ((x (logior (ash (bytevector-u8-ref bv offset) 8)
+ (bytevector-u8-ref bv (1+ offset)))))
+ (put-char port (encode (ash (logand x #xfc00) -10)))
+ (put-char port (encode (ash (logand x #x03f0) -4)))
+ (put-char port (encode (ash (logand x #x000f) 2)))
+ (put-char port #\=)))
+ ((eq? padding 2)
+ (let ((x (bytevector-u8-ref bv offset)))
+ (put-char port (encode (ash (logand x #xfc) -2)))
+ (put-char port (encode (ash (logand x #x03) 4)))
+ (put-string port "==")))))))))
diff --git a/tests/base64.scm b/tests/base64.scm
index dba4f3b..c6ebe8f 100644
--- a/tests/base64.scm
+++ b/tests/base64.scm
@@ -21,12 +21,28 @@
(with-tests "base64"
(test-group "base64-decode"
- (test-equal "encoded text no padding"
+ (test-equal "empty string"
+ ""
+ (utf8->string (base64-decode "")))
+ (test-equal "no padding"
"hello!"
(utf8->string (base64-decode "aGVsbG8h")))
- (test-equal "encoded text with one byte of padding"
+ (test-equal "one byte of padding"
"hello"
(utf8->string (base64-decode "aGVsbG8=")))
- (test-equal "encoded text with two bytes of padding"
+ (test-equal "two bytes of padding"
"what's up?"
- (utf8->string (base64-decode "d2hhdCdzIHVwPw==")))))
+ (utf8->string (base64-decode "d2hhdCdzIHVwPw=="))))
+ (test-group "base64-encode"
+ (test-equal "empty string"
+ ""
+ (base64-encode (string->utf8 "")))
+ (test-equal "no padding"
+ "aGVsbG8h"
+ (base64-encode (string->utf8 "hello!")))
+ (test-equal "one byte of padding"
+ "aGVsbG8="
+ (base64-encode (string->utf8 "hello")))
+ (test-equal "two bytes of padding"
+ "d2hhdCdzIHVwPw=="
+ (base64-encode (string->utf8 "what's up?")))))