From 600088a6629ffefefe582d74e63d52a651150e9c Mon Sep 17 00:00:00 2001 From: David Thompson Date: Thu, 18 Mar 2021 09:27:49 -0400 Subject: chapter-2: regular-expressions: Add solution for exercise 2.8. --- chapter-2/2.2-regular-expressions.scm | 151 ++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) (limited to 'chapter-2/2.2-regular-expressions.scm') diff --git a/chapter-2/2.2-regular-expressions.scm b/chapter-2/2.2-regular-expressions.scm index 236d778..21e3aad 100644 --- a/chapter-2/2.2-regular-expressions.scm +++ b/chapter-2/2.2-regular-expressions.scm @@ -217,3 +217,154 @@ ;; Exercise 2.8: Too much nesting + +(r:seq (r:quote "a") (r:dot) (r:quote "c")) +;; => \(\(a\).\(c\)\) +;; Without nesting it would simply be: a.c + +;; Situations that require nesting: +;; - interval expressions of more than one character: (abc){3,5} +;; - alternatives that part of a sequence: abc\(def\|ghi\) + +(define (r:dot) 'dot) +(define (r:bol) 'bol) +(define (r:eol) 'eol) + +(define (dot? expr) + (eq? expr 'dot)) + +(define (bol? expr) + (eq? expr 'bol)) + +(define (eol? expr) + (eq? expr 'eol)) + +(define (r:alt . exprs) + `(alt ,@exprs)) + +(define (alt? expr) + (and (pair? expr) (eq? (car expr) 'alt))) + +(define (seq? expr) + (and (pair? expr) (eq? (car expr) 'seq))) + +(define (r:group expr) + `(group ,expr)) + +(define (group? expr) + (and (pair? expr) (eq? (car expr) 'group))) + +(define (r:seq . exprs) + (if (= (length exprs) 1) + (car exprs) + `(seq ,@(append-map (lambda (expr) + (cond + ((alt? expr) + (list (r:group expr))) + ((seq? expr) + (cdr expr)) + (else + (list expr)))) + exprs)))) + +(define (r:quote string) + (if (= (string-length string) 1) + (string-ref string 0) + (apply r:seq (string->list string)))) + +(define (r:repeat min max expr) + `(interval ,min ,max + ,(if (or (char? expr) + (eol? expr) + (bol? expr) + (dot? expr)) + expr + (r:group expr)))) + +(define (interval? expr) + (and (pair? expr) (eq? (car expr) 'interval))) + +(define (r:char-from string) + (case (string-length string) + ((0) (r:seq)) + ((1) (r:quote string)) + (else + `(char-from ,string)))) + +(define (char-from? expr) + (and (pair? expr) (eq? (car expr) 'char-from))) + +(define (r:char-not-from string) + `(char-not-from ,string)) + +(define (char-not-from? expr) + (and (pair? expr) (eq? (car expr) 'char-not-from))) + +(define (s-regexp->bre expr) + (cond + ((dot? expr) ".") + ((bol? expr) "^") + ((eol? expr) "$") + ((char? expr) + (if (memv expr chars-needing-quoting) + (string #\\ expr) + (string expr))) + ((char-from? expr) + (bracket (list-ref expr 1) + (lambda (members) + (if (lset= eq? '(#\- #\^) members) + '(#\- #\^) + (quote-bracketed-contents members))))) + ((char-not-from? expr) + (bracket (list-ref expr 1) + (lambda (members) + (cons #\^ (quote-bracketed-contents members))))) + ((group? expr) + (string-append "\\(" (s-regexp->bre (list-ref expr 1)) "\\)")) + ((alt? expr) + (let ((subexprs (cdr expr))) + (if (null? subexprs) + "" + (string-concatenate + (cons (s-regexp->bre (car subexprs)) + (append-map (lambda (subexpr) + (list "\\|" (s-regexp->bre subexpr))) + (cdr subexprs))))))) + ((seq? expr) + (string-concatenate + (map s-regexp->bre (cdr expr)))) + ((interval? expr) + (let ((min (list-ref expr 1)) + (max (list-ref expr 2)) + (subexpr (list-ref expr 3))) + (cond + ((not max) + (string-append (s-regexp->bre subexpr) + "\\{" (number->string min) ",\\}")) + ((= max min) + (string-append (s-regexp->bre subexpr) + "\\{" (number->string min) "\\}")) + (else + (string-append (s-regexp->bre subexpr) + "\\{" (number->string min) "," + (number->string max) "\\}"))))) + (else + (error "unsupported expression" expr)))) + +(grep-test (s-regexp->bre (r:quote "abc")) + "abc") +(grep-test (s-regexp->bre (r:seq (r:quote "a") (r:dot) (r:quote "c"))) + "abc" "adc") +(grep-test (s-regexp->bre (r:seq (r:bol) (r:quote "cat") (r:quote "dog") (r:eol))) + "catdog" + '(not "ccatdogg")) +(grep-test (s-regexp->bre (r:repeat 1 2 (r:quote "cat"))) + "cat" "catcat" '(not "dog")) +(grep-test (s-regexp->bre (r:+ (r:quote "abc"))) + "abc" "abcabcabc" '(not "ab")) +(grep-test (s-regexp->bre (r:seq (r:bol) (r:quote "cat") (r:repeat 0 #f (r:dot)) (r:quote "dog") (r:eol))) + "catdog" "catfrogdog" '(not "ccatdogg")) +(grep-test (s-regexp->bre (r:seq (r:quote "frog") (r:alt (r:quote "dog") (r:quote "cat")))) + "frogdog" "frogcat" '(not "frogfrog")) +(grep-test (s-regexp->bre (r:repeat 3 5 (r:alt (r:quote "cat") (r:quote "dog")))) + "catdogcat" "dogcatdogcat" "catcatcatdogdog" '(not "catdogfrogcatcat")) -- cgit v1.2.3