summaryrefslogtreecommitdiff
path: root/chapter-2
diff options
context:
space:
mode:
Diffstat (limited to 'chapter-2')
-rw-r--r--chapter-2/2.2-regular-expressions.scm151
1 files changed, 151 insertions, 0 deletions
diff --git a/chapter-2/2.2-regular-expressions.scm b/chapter-2/2.2-regular-expressions.scm
index 236d778..21e3aad 100644
--- a/chapter-2/2.2-regular-expressions.scm
+++ b/chapter-2/2.2-regular-expressions.scm
@@ -217,3 +217,154 @@
;; Exercise 2.8: Too much nesting
+
+(r:seq (r:quote "a") (r:dot) (r:quote "c"))
+;; => \(\(a\).\(c\)\)
+;; Without nesting it would simply be: a.c
+
+;; Situations that require nesting:
+;; - interval expressions of more than one character: (abc){3,5}
+;; - alternatives that part of a sequence: abc\(def\|ghi\)
+
+(define (r:dot) 'dot)
+(define (r:bol) 'bol)
+(define (r:eol) 'eol)
+
+(define (dot? expr)
+ (eq? expr 'dot))
+
+(define (bol? expr)
+ (eq? expr 'bol))
+
+(define (eol? expr)
+ (eq? expr 'eol))
+
+(define (r:alt . exprs)
+ `(alt ,@exprs))
+
+(define (alt? expr)
+ (and (pair? expr) (eq? (car expr) 'alt)))
+
+(define (seq? expr)
+ (and (pair? expr) (eq? (car expr) 'seq)))
+
+(define (r:group expr)
+ `(group ,expr))
+
+(define (group? expr)
+ (and (pair? expr) (eq? (car expr) 'group)))
+
+(define (r:seq . exprs)
+ (if (= (length exprs) 1)
+ (car exprs)
+ `(seq ,@(append-map (lambda (expr)
+ (cond
+ ((alt? expr)
+ (list (r:group expr)))
+ ((seq? expr)
+ (cdr expr))
+ (else
+ (list expr))))
+ exprs))))
+
+(define (r:quote string)
+ (if (= (string-length string) 1)
+ (string-ref string 0)
+ (apply r:seq (string->list string))))
+
+(define (r:repeat min max expr)
+ `(interval ,min ,max
+ ,(if (or (char? expr)
+ (eol? expr)
+ (bol? expr)
+ (dot? expr))
+ expr
+ (r:group expr))))
+
+(define (interval? expr)
+ (and (pair? expr) (eq? (car expr) 'interval)))
+
+(define (r:char-from string)
+ (case (string-length string)
+ ((0) (r:seq))
+ ((1) (r:quote string))
+ (else
+ `(char-from ,string))))
+
+(define (char-from? expr)
+ (and (pair? expr) (eq? (car expr) 'char-from)))
+
+(define (r:char-not-from string)
+ `(char-not-from ,string))
+
+(define (char-not-from? expr)
+ (and (pair? expr) (eq? (car expr) 'char-not-from)))
+
+(define (s-regexp->bre expr)
+ (cond
+ ((dot? expr) ".")
+ ((bol? expr) "^")
+ ((eol? expr) "$")
+ ((char? expr)
+ (if (memv expr chars-needing-quoting)
+ (string #\\ expr)
+ (string expr)))
+ ((char-from? expr)
+ (bracket (list-ref expr 1)
+ (lambda (members)
+ (if (lset= eq? '(#\- #\^) members)
+ '(#\- #\^)
+ (quote-bracketed-contents members)))))
+ ((char-not-from? expr)
+ (bracket (list-ref expr 1)
+ (lambda (members)
+ (cons #\^ (quote-bracketed-contents members)))))
+ ((group? expr)
+ (string-append "\\(" (s-regexp->bre (list-ref expr 1)) "\\)"))
+ ((alt? expr)
+ (let ((subexprs (cdr expr)))
+ (if (null? subexprs)
+ ""
+ (string-concatenate
+ (cons (s-regexp->bre (car subexprs))
+ (append-map (lambda (subexpr)
+ (list "\\|" (s-regexp->bre subexpr)))
+ (cdr subexprs)))))))
+ ((seq? expr)
+ (string-concatenate
+ (map s-regexp->bre (cdr expr))))
+ ((interval? expr)
+ (let ((min (list-ref expr 1))
+ (max (list-ref expr 2))
+ (subexpr (list-ref expr 3)))
+ (cond
+ ((not max)
+ (string-append (s-regexp->bre subexpr)
+ "\\{" (number->string min) ",\\}"))
+ ((= max min)
+ (string-append (s-regexp->bre subexpr)
+ "\\{" (number->string min) "\\}"))
+ (else
+ (string-append (s-regexp->bre subexpr)
+ "\\{" (number->string min) ","
+ (number->string max) "\\}")))))
+ (else
+ (error "unsupported expression" expr))))
+
+(grep-test (s-regexp->bre (r:quote "abc"))
+ "abc")
+(grep-test (s-regexp->bre (r:seq (r:quote "a") (r:dot) (r:quote "c")))
+ "abc" "adc")
+(grep-test (s-regexp->bre (r:seq (r:bol) (r:quote "cat") (r:quote "dog") (r:eol)))
+ "catdog"
+ '(not "ccatdogg"))
+(grep-test (s-regexp->bre (r:repeat 1 2 (r:quote "cat")))
+ "cat" "catcat" '(not "dog"))
+(grep-test (s-regexp->bre (r:+ (r:quote "abc")))
+ "abc" "abcabcabc" '(not "ab"))
+(grep-test (s-regexp->bre (r:seq (r:bol) (r:quote "cat") (r:repeat 0 #f (r:dot)) (r:quote "dog") (r:eol)))
+ "catdog" "catfrogdog" '(not "ccatdogg"))
+(grep-test (s-regexp->bre (r:seq (r:quote "frog") (r:alt (r:quote "dog") (r:quote "cat"))))
+ "frogdog" "frogcat" '(not "frogfrog"))
+(grep-test (s-regexp->bre (r:repeat 3 5 (r:alt (r:quote "cat") (r:quote "dog"))))
+ "catdogcat" "dogcatdogcat" "catcatcatdogdog" '(not "catdogfrogcatcat"))