;;; Copyright © 2021 Gerald Sussman and Chris Hanson ;;; Copyright © 2021 David Thompson ;;; ;;; This program is free software: you can redistribute it and/or ;;; modify it under the terms of the GNU General Public License as ;;; published by the Free Software Foundation, either version 3 of the ;;; License, or (at your option) any later version. ;;; ;;; This program is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;; General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with this program. If not, see ;;; . (use-modules (ice-9 popen) (srfi srfi-1)) ;; 2.2.2 Implementation of the translator (define-syntax-rule (assert cond) (unless cond (error "assertion failed" 'cond))) ;; I am excluding the write-bourne-shell-grep-command and ;; bourne-shell-command-string procedures in the book in favor of ;; using Guile's pipe API. open-pipe* does not open a shell but runs ;; grep directly, eliminating the need to do shell quoting. The book ;; asks "Are we having fun yet?" to which I say "yes, but I draw the ;; line at shell quoting." (define (grep regexp string) (let ((pipe (open-pipe* OPEN_WRITE "grep" "-e" regexp))) (display string pipe) (newline pipe) (zero? (status:exit-val (close-pipe pipe))))) (define (grep-test regexp . specs) (for-each (lambda (spec) (assert (if (and (pair? spec) (eq? (car spec) 'not)) (not (grep regexp (list-ref spec 1))) (grep regexp spec)))) specs)) (define (r:dot) ".") (define (r:bol) "^") (define (r:eol) "$") (define (r:seq . exprs) (string-append "\\(" (apply string-append exprs) "\\)")) (define chars-needing-quoting '(#\. #\[ #\\ #\^ #\$ #\*)) (define (r:quote string) (r:seq (list->string (append-map (lambda (char) (if (memv char chars-needing-quoting) (list #\\ char) (list char))) (string->list string))))) (grep-test (r:seq (r:quote "a") (r:dot) (r:quote "c")) "abc") (define (r:alt . exprs) (if (pair? exprs) (apply r:seq (cons (car exprs) (append-map (lambda (expr) (list "\\|" expr)) (cdr exprs)))) (r:seq))) (grep-test (r:alt (r:quote "foo") (r:quote "bar") (r:quote "baz")) "foo" "bar" "baz" '(not "frob")) (define (r:repeat min max expr) (apply r:seq (append (make-list min expr) (cond ((not max) (list expr "*")) ((= max min) '()) (else (make-list (- max min) (r:alt expr ""))))))) (grep-test (r:repeat 3 5 (r:alt (r:quote "cat") (r:quote "dog"))) "catdogcat" "catcatdogdog" "dogdogcatdogdog" '(not "catdogfrog")) (define (bracket string procedure) (list->string (append '(#\[) (procedure (string->list string)) '(#\])))) (define chars-needing-quoting-in-brackets '(#\] #\^ #\-)) (define (quote-bracketed-contents members) (define (optional char) (if (memv char members) (list char) '())) (append (optional #\]) (remove (lambda (c) (memv c chars-needing-quoting-in-brackets)) members) (optional #\^) (optional #\-))) (define (r:char-from string) (case (string-length string) ((0) (r:seq)) ((1) (r:quote string)) (else (bracket string (lambda (members) (if (lset= eq? '(#\- #\^) members) '(#\- #\^) (quote-bracketed-contents members))))))) (define (r:char-not-from string) (bracket string (lambda (members) (cons #\^ (quote-bracketed-contents members))))) ;; Exercise 2.6: Adding * and + to regular expressions (define (r:* expr) (r:repeat 0 #f expr)) (define (r:+ expr) (r:repeat 1 #f expr)) (grep-test (r:* (r:quote "foo")) "" "foo" "bar") (grep-test (r:seq (r:quote "foo") (r:* (r:quote "bar"))) "foo" "foobar" '(not "")) (grep-test (r:+ (r:quote "foo")) '(not "") "foo" "foofoo") (grep-test (r:seq (r:quote "foo") (r:+ (r:quote "bar"))) '(not "foo") "foobar" "foobarbar") ;; Exercise 2.7: A bug, one bad joke, two tweaks, and a revelation ;; Helpful context: The POSIX standard chapter on regular expressions: ;; https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html ;; a ;; Unbounded recursion! ;; b ;; Eva's proposal runs in linear time and produces a relatively ;; concise regular expression. Alyssa's proposal would produce a ;; massively long expression that specifies, in full detail, all of ;; the permutations that should match. Passing #f for the max ;; argument would result in unbounded recursion. Leaving that issue ;; aside, the time complexity also suffers. In addition to looping (- ;; max min) times, each iteration of this outer loop would need an ;; additional inner loop requiring min, min + 1, min + 2, ..., max ;; iterations. ;; c ;; Ben's propsoal of using interval expression has numerous ;; advantages. Interval expressions are a feature of the underlying ;; language so the combinator implementation is straightforward, ;; produces small expression strings, and is constant time rather than ;; linear time. Eva's proposal requires targeting the ERE language ;; rather than BRE, which is incompatible with BRE, so other aspects ;; of the implemenation would likely have to be changed. ;; d (define (r:repeat min max expr) (cond ((not max) (string-append expr "\\{" (number->string min) ",\\}")) ((= max min) (string-append expr "\\{" (number->string min) "\\}")) (else (string-append expr "\\{" (number->string min) "," (number->string max) "\\}")))) (grep-test (r:repeat 3 5 (r:alt (r:quote "cat") (r:quote "dog"))) "catdogcat" "catcatdogdog" "dogdogcatdogdog" '(not "catdogfrog")) (grep-test (r:repeat 2 #f (r:alt (r:quote "cat") (r:quote "dog"))) "catdog" "catdogdog" '(not "cat") '(not "dog") '(not "dogfrog")) (grep-test (r:repeat 2 2 (r:alt (r:quote "cat") (r:quote "dog"))) "catdog" "catcat" "dogdog" '(not "catfrogdog") '(not "cat") '(not "dog") '(not "dogfrog")) ;; Exercise 2.8: Too much nesting (r:seq (r:quote "a") (r:dot) (r:quote "c")) ;; => \(\(a\).\(c\)\) ;; Without nesting it would simply be: a.c ;; Situations that require nesting: ;; - interval expressions of more than one character: (abc){3,5} ;; - alternatives that part of a sequence: abc\(def\|ghi\) (define (r:dot) 'dot) (define (r:bol) 'bol) (define (r:eol) 'eol) (define (dot? expr) (eq? expr 'dot)) (define (bol? expr) (eq? expr 'bol)) (define (eol? expr) (eq? expr 'eol)) (define (r:alt . exprs) `(alt ,@exprs)) (define (alt? expr) (and (pair? expr) (eq? (car expr) 'alt))) (define (seq? expr) (and (pair? expr) (eq? (car expr) 'seq))) (define (r:group expr) `(group ,expr)) (define (group? expr) (and (pair? expr) (eq? (car expr) 'group))) (define (r:seq . exprs) (if (= (length exprs) 1) (car exprs) `(seq ,@(append-map (lambda (expr) (cond ((alt? expr) (list (r:group expr))) ((seq? expr) (cdr expr)) (else (list expr)))) exprs)))) (define (r:quote string) (if (= (string-length string) 1) (string-ref string 0) (apply r:seq (string->list string)))) (define (r:repeat min max expr) `(interval ,min ,max ,(if (or (char? expr) (eol? expr) (bol? expr) (dot? expr)) expr (r:group expr)))) (define (interval? expr) (and (pair? expr) (eq? (car expr) 'interval))) (define (r:char-from string) (case (string-length string) ((0) (r:seq)) ((1) (r:quote string)) (else `(char-from ,string)))) (define (char-from? expr) (and (pair? expr) (eq? (car expr) 'char-from))) (define (r:char-not-from string) `(char-not-from ,string)) (define (char-not-from? expr) (and (pair? expr) (eq? (car expr) 'char-not-from))) (define (s-regexp->bre expr) (cond ((dot? expr) ".") ((bol? expr) "^") ((eol? expr) "$") ((char? expr) (if (memv expr chars-needing-quoting) (string #\\ expr) (string expr))) ((char-from? expr) (bracket (list-ref expr 1) (lambda (members) (if (lset= eq? '(#\- #\^) members) '(#\- #\^) (quote-bracketed-contents members))))) ((char-not-from? expr) (bracket (list-ref expr 1) (lambda (members) (cons #\^ (quote-bracketed-contents members))))) ((group? expr) (string-append "\\(" (s-regexp->bre (list-ref expr 1)) "\\)")) ((alt? expr) (let ((subexprs (cdr expr))) (if (null? subexprs) "" (string-concatenate (cons (s-regexp->bre (car subexprs)) (append-map (lambda (subexpr) (list "\\|" (s-regexp->bre subexpr))) (cdr subexprs))))))) ((seq? expr) (string-concatenate (map s-regexp->bre (cdr expr)))) ((interval? expr) (let ((min (list-ref expr 1)) (max (list-ref expr 2)) (subexpr (list-ref expr 3))) (cond ((not max) (string-append (s-regexp->bre subexpr) "\\{" (number->string min) ",\\}")) ((= max min) (string-append (s-regexp->bre subexpr) "\\{" (number->string min) "\\}")) (else (string-append (s-regexp->bre subexpr) "\\{" (number->string min) "," (number->string max) "\\}"))))) (else (error "unsupported expression" expr)))) (grep-test (s-regexp->bre (r:quote "abc")) "abc") (grep-test (s-regexp->bre (r:seq (r:quote "a") (r:dot) (r:quote "c"))) "abc" "adc") (grep-test (s-regexp->bre (r:seq (r:bol) (r:quote "cat") (r:quote "dog") (r:eol))) "catdog" '(not "ccatdogg")) (grep-test (s-regexp->bre (r:repeat 1 2 (r:quote "cat"))) "cat" "catcat" '(not "dog")) (grep-test (s-regexp->bre (r:+ (r:quote "abc"))) "abc" "abcabcabc" '(not "ab")) (grep-test (s-regexp->bre (r:seq (r:bol) (r:quote "cat") (r:repeat 0 #f (r:dot)) (r:quote "dog") (r:eol))) "catdog" "catfrogdog" '(not "ccatdogg")) (grep-test (s-regexp->bre (r:seq (r:quote "frog") (r:alt (r:quote "dog") (r:quote "cat")))) "frogdog" "frogcat" '(not "frogfrog")) (grep-test (s-regexp->bre (r:repeat 3 5 (r:alt (r:quote "cat") (r:quote "dog")))) "catdogcat" "dogcatdogcat" "catcatcatdogdog" '(not "catdogfrogcatcat")) ;; Exercise 2.9: Back-references ;; Exercise 2.10: Standards? ;; a ;; Differences between BREs and EREs: ;; - BREs use \( and \) for grouping, EREs use ( and ) ;; - BREs use \{ and \} for intervals, EREs use { and } ;; - BREs do not support alternation (non-standard GNU extension ;; provides it), but EREs do. ;; - GNU-extended BREs use \| for alternation, EREs use | ;; b ;; c