summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Thompson <dthompson2@worcester.edu>2016-02-16 12:45:26 -0500
committerDavid Thompson <dthompson2@worcester.edu>2016-02-16 12:45:26 -0500
commitb2a796eb98638e2710a4f6ae687ca3757f7a3e8a (patch)
tree1ce26efbebd69df4a98012e0259b027295639ca0
parent26eaa6e5978b02dedfa7aae7f10be3b0c4d3cb15 (diff)
Rewrite highlighters in terms of lexers.
-rw-r--r--syntax-highlight.scm46
-rw-r--r--syntax-highlight/scheme.scm156
-rw-r--r--syntax-highlight/xml.scm150
3 files changed, 158 insertions, 194 deletions
diff --git a/syntax-highlight.scm b/syntax-highlight.scm
index 091eed2..4326a4c 100644
--- a/syntax-highlight.scm
+++ b/syntax-highlight.scm
@@ -23,36 +23,34 @@
(define-module (syntax-highlight)
#:use-module (ice-9 match)
+ #:use-module (ice-9 rdelim)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-11)
#:use-module (srfi srfi-26)
- #:use-module (srfi srfi-41)
- #:use-module (syntax-highlight utils)
+ #:use-module (syntax-highlight lexers)
#:export (highlight
highlights->sxml))
-(define* (highlight highlighter #:optional (stream (current-input-port)))
- "Apply HIGHLIGHTER, a syntax highlighting procedure, to STREAM.
-STREAM may be an open port, string, or SRFI-41 character stream. If
-STREAM is not specified, characters are read from the current input
-port."
- (let-values (((result remaining)
- (highlighter (cond
- ((port? stream)
- (port->stream stream))
- ((string? stream)
- (string->stream stream))
- ((stream? stream)
- stream)
- (else
- (error "Cannot convert to stream: " stream))))))
- ;; If there's a remainder, it means that parsing failed. We want
- ;; to preserve *all* of the input text, even if it is invalid.
- ;; So, we take the stuff that could be parsed and tack on the
- ;; stuff that wasn't.
- (if (stream-null? remaining)
- result
- (append result (list (stream->string remaining))))))
+(define (object->string obj)
+ (cond
+ ((input-port? obj)
+ (read-string obj))
+ ((string? obj)
+ obj)
+ (else
+ (error "not an input port or string: "
+ obj))))
+
+(define object->cursor
+ (compose string->cursor object->string))
+
+(define* (highlight lexer #:optional (string-or-port (current-input-port)))
+ "Apply LEXER to STRING-OR-PORT, a string or an open input port. If
+STRING-OR-PORT is not specified, characters are read from the current
+input port."
+ (let-values (((result remainder)
+ (lexer empty-tokens (object->cursor string-or-port))))
+ (and result (tokens->list result))))
(define (highlights->sxml highlights)
"Convert HIGHLIGHTS, a list of syntax highlighting expressions, into
diff --git a/syntax-highlight/scheme.scm b/syntax-highlight/scheme.scm
index 5b906c7..2571e29 100644
--- a/syntax-highlight/scheme.scm
+++ b/syntax-highlight/scheme.scm
@@ -22,113 +22,87 @@
;;; Code:
(define-module (syntax-highlight scheme)
+ #:use-module (ice-9 match)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-11)
- #:use-module (srfi srfi-41)
- #:use-module (syntax-highlight parsers)
+ #:use-module (srfi srfi-26)
+ #:use-module (syntax-highlight lexers)
#:export (%default-special-symbols
%default-special-regexps
- make-scheme-highlighter
- scheme-highlighter))
+ make-scheme-lexer
+ lex-scheme))
(define char-set:lisp-delimiters
(char-set-union char-set:whitespace
(char-set #\( #\) #\[ #\] #\{ #\})))
-(define (lisp-delimiter? char)
- (char-set-contains? char-set:lisp-delimiters char))
-
-(define parse-symbol-chars
- (parse-char-set
- (char-set-complement char-set:lisp-delimiters)))
-
-(define (parse-specials special-words)
- "Create a parser for SPECIAL-WORDS, a list of important terms for a
-language."
- (define (special word)
- (let ((parser (tagged-parser 'special (parse-string word))))
- (lambda (stream)
- (let-values (((result rest-of-stream) (parser stream)))
- (if (and result (lisp-delimiter? (stream-car stream)))
- (values result rest-of-stream)
- (parse-fail stream))))))
-
- (fold parse-either parse-fail (map special special-words)))
-
-(define (parse-specials/regexp special-regexps)
- (let ((merged-regexp
- (string-join (map (lambda (regexp)
- (string-append "(" regexp ")"))
- special-regexps)
- "|")))
- (tagged-parser 'special
- (parse-regexp merged-regexp parse-symbol-chars))))
-
-(define (parse-openers openers)
- (define (open opener)
- (tagged-parser 'open (parse-string opener)))
-
- (fold parse-either parse-fail (map open openers)))
-
-(define (parse-closers closers)
- (define (close closer)
- (tagged-parser 'close (parse-string closer)))
-
- (fold parse-either parse-fail (map close closers)))
-
-(define parse-symbol
- (tagged-parser 'symbol
- (parse-char-set
- (char-set-complement char-set:lisp-delimiters))))
-
-(define parse-keyword
- (tagged-parser 'keyword
- (parse-map string-concatenate
- (parse-each (parse-string "#:")
- parse-symbol-chars))))
-
-(define parse-string-literal
- (tagged-parser 'string (parse-delimited "\"")))
-
-(define parse-comment
- (tagged-parser 'comment (parse-delimited ";" #:until "\n")))
-
-(define parse-quoted-symbol
- (tagged-parser 'symbol (parse-delimited "#{" #:until "}#")))
+(define char-set:lisp-symbol
+ (char-set-complement char-set:lisp-delimiters))
(define %default-special-symbols
- '("define" "begin" "call-with-current-continuation" "call/cc"
+ '("begin" "call-with-current-continuation" "call/cc"
"call-with-input-file" "call-with-output-file"
"case" "cond"
"do" "else" "if"
"lambda" "λ"
"let" "let*" "let-syntax" "letrec" "letrec-syntax"
- "export" "import" "library" "define-module" "use-module"
"let-values" "let*-values"
+ "export" "import" "library" "use-module"
"and" "or"
"delay" "force"
- "map" "for-each"
- "syntax" "syntax-rules"))
-
-(define %default-special-regexps
- '("^define"))
-
-(define* (make-scheme-highlighter special-symbols special-regexps)
- "Create a syntax highlighting procedure for Scheme that associates
-the 'special' tag for symbols appearing in the list SPECIAL-SYMBOLS or
-matching a regular expression in SPECIAL-REGEXPS."
- (parse-many
- (parse-any parse-whitespace
- (parse-openers '("(" "[" "{"))
- (parse-closers '(")" "]" "}"))
- (parse-specials special-symbols)
- (parse-specials/regexp special-regexps)
- parse-string-literal
- parse-comment
- parse-keyword
- parse-quoted-symbol
- parse-symbol)))
-
-(define scheme-highlighter
- (make-scheme-highlighter %default-special-symbols
- %default-special-regexps))
+ "map" "for-each"))
+
+(define %default-special-prefixes
+ '("define" "syntax"))
+
+(define (lex-special-symbol sym)
+ (lex-filter (lambda (str)
+ (string=? sym str))
+ (lex-char-set char-set:lisp-symbol)))
+
+(define (lex-special-symbol symbols prefixes)
+ (lex-filter (lambda (str)
+ (or (any (cut string=? symbols <>) symbols)
+ (any (cut string-prefix? <> str) prefixes)))
+ (lex-char-set char-set:lisp-symbol)))
+
+(define (lex-map2 proc lexer)
+ (lambda (tokens cursor)
+ (let-values (((result remainder) (lexer tokens cursor)))
+ (if result
+ (match (token-take result 2)
+ ((second first) ; accumulator tokens are in reverse order
+ (values (token-add (token-drop result 2)
+ (proc first second))
+ remainder)))
+ (fail)))))
+
+(define (make-scheme-lexer special-symbols special-prefixes)
+ "Return a lexer that highlights Scheme source code. Tag strings
+that are in SPECIAL-SYMBOLS or match one of the string prefixes in
+SPECIAL-PREFIXES with the 'special' tag."
+ (lex-consume
+ (lex-any (lex-char-set char-set:whitespace)
+ (lex-tag 'open (lex-any* (map lex-string '("(" "[" "{"))))
+ (lex-tag 'close (lex-any* (map lex-string '(")" "]" "}"))))
+ (lex-tag 'comment (lex-delimited ";" #:until "\n"))
+ (lex-tag 'multi-line-comment
+ (lex-delimited "#|" #:until "|#" #:nested? #t))
+ (lex-tag 'special
+ (lex-filter (lambda (str)
+ (or (any (cut string=? <> str)
+ special-symbols)
+ (any (cut string-prefix? <> str)
+ special-prefixes)))
+ (lex-char-set char-set:lisp-symbol)))
+ (lex-tag 'string (lex-delimited "\""))
+ (lex-tag 'keyword
+ (lex-map2 string-append
+ (lex-all (lex-string "#:")
+ (lex-char-set char-set:lisp-symbol))))
+ (lex-tag 'symbol
+ (lex-any (lex-delimited "#{" #:until "}#")
+ (lex-char-set char-set:lisp-symbol))))))
+
+(define lex-scheme
+ (make-scheme-lexer %default-special-symbols %default-special-prefixes))
diff --git a/syntax-highlight/xml.scm b/syntax-highlight/xml.scm
index 87b661b..d91a8e5 100644
--- a/syntax-highlight/xml.scm
+++ b/syntax-highlight/xml.scm
@@ -26,86 +26,78 @@
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-11)
#:use-module (srfi srfi-41)
- #:use-module (syntax-highlight parsers)
- #:export (xml-highlighter))
-
-(define (flatten+compact highlights)
- (define tagged?
- (match-lambda
- (((? symbol?) _) #t)
- (_ #f)))
-
- (let loop ((highlights highlights)
- (result '()))
- (match highlights
- (() (reverse result))
- (("" . tail)
- (loop tail result))
- (((or (? string? head) (? tagged? head)) . tail)
- (loop tail (cons head result)))
- ((head . tail)
- (loop tail (append (reverse (flatten+compact head)) result))))))
-
-(define parse-comment
- (tagged-parser 'comment (parse-delimited "<!--" #:until "-->")))
-
-(define parse-xml-symbol
- (parse-char-set
- (char-set-union char-set:letter+digit
- (char-set #\. #\- #\_ #\:))))
-
-(define parse-element-name
- (tagged-parser 'element parse-xml-symbol))
-
-(define parse-whitespace-maybe
- (parse-maybe parse-whitespace))
-
-(define parse-attribute
- (parse-each (tagged-parser 'attribute parse-xml-symbol)
- parse-whitespace-maybe
- (parse-string "=")
- parse-whitespace-maybe
- (tagged-parser 'string (parse-delimited "\""))))
-
-(define parse-open-tag
- (parse-each (tagged-parser 'open (parse-any (parse-string "<?")
- (parse-string "<")))
- parse-element-name
- (parse-many
- (parse-any (parse-each
- parse-whitespace
- parse-attribute)
- parse-whitespace))
- (tagged-parser 'close (parse-any (parse-string ">")
- (parse-string "/>")
- (parse-string "?>")))))
-
-(define parse-close-tag
- (parse-each (tagged-parser 'open (parse-string "</"))
- parse-element-name
- (tagged-parser 'close (parse-string ">"))))
+ #:use-module (syntax-highlight lexers)
+ #:export (lex-xml))
(define char-set:not-whitespace
(char-set-complement char-set:whitespace))
-(define parse-tag
- (parse-each (parse-string "<")
- (parse-char-set
- (char-set-delete char-set:not-whitespace #\>))
- (parse-string ">")))
-
-(define parse-entity
- (tagged-parser 'entity (parse-delimited "&" #:until ";")))
-
-(define parse-text
- (parse-char-set
- (char-set-difference char-set:full (char-set #\<))))
-
-(define xml-highlighter
- (parse-map flatten+compact
- (parse-many
- (parse-any parse-comment
- parse-close-tag
- parse-open-tag
- parse-entity
- parse-text))))
+(define char-set:xml-symbol
+ (char-set-union char-set:letter+digit
+ (char-set #\. #\- #\_ #\:)))
+
+(define lex-comment
+ (lex-tag 'comment (lex-delimited "<!--" #:until "-->")))
+
+(define lex-xml-symbol
+ (lex-char-set char-set:xml-symbol))
+
+(define lex-element-name
+ (lex-tag 'element lex-xml-symbol))
+
+(define lex-whitespace-maybe
+ (lex-maybe lex-whitespace))
+
+(define lex-attribute
+ (lex-all (lex-tag 'attribute lex-xml-symbol)
+ lex-whitespace-maybe
+ (lex-string "=")
+ lex-whitespace-maybe
+ (lex-tag 'string (lex-delimited "\""))))
+
+(define lex-open-tag
+ (lex-all (lex-tag 'open (lex-any (lex-string "<?")
+ (lex-string "<")))
+ lex-element-name
+ (lex-zero-or-more
+ (lex-any (lex-all lex-whitespace
+ lex-attribute)
+ lex-whitespace))
+ (lex-tag 'close (lex-any (lex-string ">")
+ (lex-string "/>")
+ (lex-string "?>")))))
+
+(define lex-close-tag
+ (lex-all (lex-tag 'open (lex-string "</"))
+ lex-element-name
+ (lex-tag 'close (lex-string ">"))))
+
+(define lex-entity
+ (lex-tag 'entity (lex-delimited "&" #:until ";")))
+
+(define lex-text
+ (lex-char-set (char-set-difference char-set:full
+ (char-set #\< #\&))))
+
+(define lex-whitespace-maybe
+ (lex-maybe lex-whitespace))
+
+(define lex-xml-element
+ (lex-tag 'element lex-xml-symbol))
+
+(define lex-xml-attribute
+ (lex-all (lex-tag 'attribute lex-xml-symbol)
+ lex-whitespace-maybe
+ (lex-string "=")
+ lex-whitespace-maybe
+ (lex-tag 'string (lex-delimited "\""))))
+
+(define lex-xml
+ (lex-consume
+ (lex-any lex-comment
+ lex-close-tag
+ lex-open-tag
+ lex-entity
+ lex-text)))
+
+(lex-xml empty-tokens (string->cursor "<foo bar=\"baz\">quux &copy; </foo>"))