From 3d029d49d2ae1809ae718986f9c9b7e2adf2fd6d Mon Sep 17 00:00:00 2001 From: David Thompson Date: Fri, 19 Aug 2016 07:37:22 -0400 Subject: Switch from Skribe to Markdown. --- posts/sxml-html-guile.md | 501 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 501 insertions(+) create mode 100644 posts/sxml-html-guile.md (limited to 'posts/sxml-html-guile.md') diff --git a/posts/sxml-html-guile.md b/posts/sxml-html-guile.md new file mode 100644 index 0000000..f600d7f --- /dev/null +++ b/posts/sxml-html-guile.md @@ -0,0 +1,501 @@ +title: Rendering HTML with SXML and GNU Guile +date: 2015-04-10 18:00 +tags: gnu, guile, wsu +summary: With a little effort, SXML can be used for HTML templates +--- + +GNU Guile provides modules for working with XML documents called SXML. +SXML provides an elegant way of writing XML documents as s-expressions +that can be easily manipulated in Scheme. Here’s an example: + +```scheme +(sxml->xml '(foo (bar (@ (attr "something"))))) +``` + +```xml + +``` + +I don’t know about you, but I work with HTML documents much more often +than XML. Since HTML is very similar to XML, we should be able to +represent it with SXML, too! + +```scheme +(sxml->xml + '(html + (head + (title "Hello, world!") + (script (@ (src "foo.js")))) + (body + (h1 "Hello!")))) +``` + +```html + + + Hello, world! + + + +

Hello!

+ + +``` + +Note the use of the empty string in `(script (@ (src "foo.js")) "")`. +The output looks correct now, great! But what about the other void +elements? We’ll have to remember to use the empty string hack each +time we use one. That doesn’t sound very elegant. + +Furthermore, text isn’t even escaped properly! + +```scheme +(sxml->xml "Copyright © 2015 David Thompson ") +``` + +```html +Copyright © 2015 David Thompson <davet@gnu.org> +``` + +The `<` and `>` braces were escaped, but `©` should’ve been rendered +as `©`. Why does this fail, too? Is there a bug in SXML? + +There’s no bug. The improper rendering happens because HTML, while +similar to XML, has some different syntax rules. Instead of using +`sxml->xml`, a new procedure that is tailored to the HTML syntax is +needed. Introducing `sxml->html`: + +```scheme +(define* (sxml->html tree #:optional (port (current-output-port))) + "Write the serialized HTML form of TREE to PORT." + (match tree + (() *unspecified*) + (('doctype type) + (doctype->html type port)) + ;; Unescaped, raw HTML output + (('raw html) + (display html port)) + (((? symbol? tag) ('@ attrs ...) body ...) + (element->html tag attrs body port)) + (((? symbol? tag) body ...) + (element->html tag '() body port)) + ((nodes ...) + (for-each (cut sxml->html <> port) nodes)) + ((? string? text) + (string->escaped-html text port)) + ;; Render arbitrary Scheme objects, too. + (obj (object->escaped-html obj port)))) +``` + +In addition to being aware of void elements and escape characters, it +can also render `'(doctype "html")` as ``, or render an +unescaped HTML string using `'(raw "frog & toad")`. If we replace +`sxml->xml` with `sxml->html` in the failing example above we can see +that it does the right thing. + +```scheme +(sxml->html + '((script (@ (src "foo.js"))) + "Copyright © 2015 David Thompson ")) +``` + +```html + +Copyright © 2015 David Thompson <davet@gnu.org> +``` + +Here’s the full version of my `(sxml html)` module. It’s quite brief, +if you don’t count the ~250 lines of escape codes! This code requires +Guile 2.0.11 or greater. + +Happy hacking! + +```scheme +;; Copyright © 2015 David Thompson +;; +;; This library is free software; you can redistribute it and/or +;; modify it under the terms of the GNU Lesser General Public License +;; as published by the Free Software Foundation; either version 3 of +;; the License, or (at your option) any later version. +;; +;; This library is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; Lesser General Public License for more details. +;; +;; You should have received a copy of the GNU Lesser General Public +;; License along with this library. If not, see +;; . + +define-module (sxml html) + #:use-module (sxml simple) + #:use-module (srfi srfi-26) + #:use-module (ice-9 match) + #:use-module (ice-9 format) + #:use-module (ice-9 hash-table) + #:export (sxml->html)) + +define %void-elements + '(area + base + br + col + command + embed + hr + img + input + keygen + link + meta + param + source + track + wbr)) + +define (void-element? tag) + "Return #t if TAG is a void element." + (pair? (memq tag %void-elements))) + +define %escape-chars + (alist->hash-table + '((#\" . "quot") + (#\& . "amp") + (#\' . "apos") + (#\< . "lt") + (#\> . "gt") + (#\¡ . "iexcl") + (#\¢ . "cent") + (#\£ . "pound") + (#\¤ . "curren") + (#\¥ . "yen") + (#\¦ . "brvbar") + (#\§ . "sect") + (#\¨ . "uml") + (#\© . "copy") + (#\ª . "ordf") + (#\« . "laquo") + (#\¬ . "not") + (#\® . "reg") + (#\¯ . "macr") + (#\° . "deg") + (#\± . "plusmn") + (#\² . "sup2") + (#\³ . "sup3") + (#\´ . "acute") + (#\µ . "micro") + (#\¶ . "para") + (#\· . "middot") + (#\¸ . "cedil") + (#\¹ . "sup1") + (#\º . "ordm") + (#\» . "raquo") + (#\¼ . "frac14") + (#\½ . "frac12") + (#\¾ . "frac34") + (#\¿ . "iquest") + (#\À . "Agrave") + (#\Á . "Aacute") + (#\ . "Acirc") + (#\à . "Atilde") + (#\Ä . "Auml") + (#\Å . "Aring") + (#\Æ . "AElig") + (#\Ç . "Ccedil") + (#\È . "Egrave") + (#\É . "Eacute") + (#\Ê . "Ecirc") + (#\Ë . "Euml") + (#\Ì . "Igrave") + (#\Í . "Iacute") + (#\Î . "Icirc") + (#\Ï . "Iuml") + (#\Ð . "ETH") + (#\Ñ . "Ntilde") + (#\Ò . "Ograve") + (#\Ó . "Oacute") + (#\Ô . "Ocirc") + (#\Õ . "Otilde") + (#\Ö . "Ouml") + (#\× . "times") + (#\Ø . "Oslash") + (#\Ù . "Ugrave") + (#\Ú . "Uacute") + (#\Û . "Ucirc") + (#\Ü . "Uuml") + (#\Ý . "Yacute") + (#\Þ . "THORN") + (#\ß . "szlig") + (#\à . "agrave") + (#\á . "aacute") + (#\â . "acirc") + (#\ã . "atilde") + (#\ä . "auml") + (#\å . "aring") + (#\æ . "aelig") + (#\ç . "ccedil") + (#\è . "egrave") + (#\é . "eacute") + (#\ê . "ecirc") + (#\ë . "euml") + (#\ì . "igrave") + (#\í . "iacute") + (#\î . "icirc") + (#\ï . "iuml") + (#\ð . "eth") + (#\ñ . "ntilde") + (#\ò . "ograve") + (#\ó . "oacute") + (#\ô . "ocirc") + (#\õ . "otilde") + (#\ö . "ouml") + (#\÷ . "divide") + (#\ø . "oslash") + (#\ù . "ugrave") + (#\ú . "uacute") + (#\û . "ucirc") + (#\ü . "uuml") + (#\ý . "yacute") + (#\þ . "thorn") + (#\ÿ . "yuml") + (#\Œ . "OElig") + (#\œ . "oelig") + (#\Š . "Scaron") + (#\š . "scaron") + (#\Ÿ . "Yuml") + (#\ƒ . "fnof") + (#\ˆ . "circ") + (#\˜ . "tilde") + (#\Α . "Alpha") + (#\Β . "Beta") + (#\Γ . "Gamma") + (#\Δ . "Delta") + (#\Ε . "Epsilon") + (#\Ζ . "Zeta") + (#\Η . "Eta") + (#\Θ . "Theta") + (#\Ι . "Iota") + (#\Κ . "Kappa") + (#\Λ . "Lambda") + (#\Μ . "Mu") + (#\Ν . "Nu") + (#\Ξ . "Xi") + (#\Ο . "Omicron") + (#\Π . "Pi") + (#\Ρ . "Rho") + (#\Σ . "Sigma") + (#\Τ . "Tau") + (#\Υ . "Upsilon") + (#\Φ . "Phi") + (#\Χ . "Chi") + (#\Ψ . "Psi") + (#\Ω . "Omega") + (#\α . "alpha") + (#\β . "beta") + (#\γ . "gamma") + (#\δ . "delta") + (#\ε . "epsilon") + (#\ζ . "zeta") + (#\η . "eta") + (#\θ . "theta") + (#\ι . "iota") + (#\κ . "kappa") + (#\λ . "lambda") + (#\μ . "mu") + (#\ν . "nu") + (#\ξ . "xi") + (#\ο . "omicron") + (#\π . "pi") + (#\ρ . "rho") + (#\ς . "sigmaf") + (#\σ . "sigma") + (#\τ . "tau") + (#\υ . "upsilon") + (#\φ . "phi") + (#\χ . "chi") + (#\ψ . "psi") + (#\ω . "omega") + (#\ϑ . "thetasym") + (#\ϒ . "upsih") + (#\ϖ . "piv") + (#\  . "ensp") + (#\  . "emsp") + (#\  . "thinsp") + (#\– . "ndash") + (#\— . "mdash") + (#\‘ . "lsquo") + (#\’ . "rsquo") + (#\‚ . "sbquo") + (#\“ . "ldquo") + (#\” . "rdquo") + (#\„ . "bdquo") + (#\† . "dagger") + (#\‡ . "Dagger") + (#\• . "bull") + (#\… . "hellip") + (#\‰ . "permil") + (#\′ . "prime") + (#\″ . "Prime") + (#\‹ . "lsaquo") + (#\› . "rsaquo") + (#\‾ . "oline") + (#\⁄ . "frasl") + (#\€ . "euro") + (#\ℑ . "image") + (#\℘ . "weierp") + (#\ℜ . "real") + (#\™ . "trade") + (#\ℵ . "alefsym") + (#\← . "larr") + (#\↑ . "uarr") + (#\→ . "rarr") + (#\↓ . "darr") + (#\↔ . "harr") + (#\↵ . "crarr") + (#\⇐ . "lArr") + (#\⇑ . "uArr") + (#\⇒ . "rArr") + (#\⇓ . "dArr") + (#\⇔ . "hArr") + (#\∀ . "forall") + (#\∂ . "part") + (#\∃ . "exist") + (#\∅ . "empty") + (#\∇ . "nabla") + (#\∈ . "isin") + (#\∉ . "notin") + (#\∋ . "ni") + (#\∏ . "prod") + (#\∑ . "sum") + (#\− . "minus") + (#\∗ . "lowast") + (#\√ . "radic") + (#\∝ . "prop") + (#\∞ . "infin") + (#\∠ . "ang") + (#\∧ . "and") + (#\∨ . "or") + (#\∩ . "cap") + (#\∪ . "cup") + (#\∫ . "int") + (#\∴ . "there4") + (#\∼ . "sim") + (#\≅ . "cong") + (#\≈ . "asymp") + (#\≠ . "ne") + (#\≡ . "equiv") + (#\≤ . "le") + (#\≥ . "ge") + (#\⊂ . "sub") + (#\⊃ . "sup") + (#\⊄ . "nsub") + (#\⊆ . "sube") + (#\⊇ . "supe") + (#\⊕ . "oplus") + (#\⊗ . "otimes") + (#\⊥ . "perp") + (#\⋅ . "sdot") + (#\⋮ . "vellip") + (#\⌈ . "lceil") + (#\⌉ . "rceil") + (#\⌊ . "lfloor") + (#\⌋ . "rfloor") + (#\〈 . "lang") + (#\〉 . "rang") + (#\◊ . "loz") + (#\♠ . "spades") + (#\♣ . "clubs") + (#\♥ . "hearts") + (#\♦ . "diams")))) + +define (string->escaped-html s port) + "Write the HTML escaped form of S to PORT." + (define (escape c) + (let ((escaped (hash-ref %escape-chars c))) + (if escaped + (format port "&~a;" escaped) + (display c port)))) + (string-for-each escape s)) + +define (object->escaped-html obj port) + "Write the HTML escaped form of OBJ to PORT." + (string->escaped-html + (call-with-output-string (cut display obj <>)) + port)) + +define (attribute-value->html value port) + "Write the HTML escaped form of VALUE to PORT." + (if (string? value) + (string->escaped-html value port) + (object->escaped-html value port))) + +define (attribute->html attr value port) + "Write ATTR and VALUE to PORT." + (format port "~a=\"" attr) + (attribute-value->html value port) + (display #\" port)) + +define (element->html tag attrs body port) + "Write the HTML TAG to PORT, where TAG has the attributes in the +ist ATTRS and the child nodes in BODY." + (format port "<~a" tag) + (for-each (match-lambda + ((attr value) + (display #\space port) + (attribute->html attr value port))) + attrs) + (if (and (null? body) (void-element? tag)) + (display " />" port) + (begin + (display #\> port) + (for-each (cut sxml->html <> port) body) + (format port "" tag)))) + +define (doctype->html doctype port) + (format port "" doctype)) + +define* (sxml->html tree #:optional (port (current-output-port))) + "Write the serialized HTML form of TREE to PORT." + (match tree + (() *unspecified*) + (('doctype type) + (doctype->html type port)) + ;; Unescaped, raw HTML output + (('raw html) + (display html port)) + (((? symbol? tag) ('@ attrs ...) body ...) + (element->html tag attrs body port)) + (((? symbol? tag) body ...) + (element->html tag '() body port)) + ((nodes ...) + (for-each (cut sxml->html <> port) nodes)) + ((? string? text) + (string->escaped-html text port)) + ;; Render arbitrary Scheme objects, too. + (obj (object->escaped-html obj port)))) +``` -- cgit v1.2.3