From b2a796eb98638e2710a4f6ae687ca3757f7a3e8a Mon Sep 17 00:00:00 2001 From: David Thompson Date: Tue, 16 Feb 2016 12:45:26 -0500 Subject: Rewrite highlighters in terms of lexers. --- syntax-highlight.scm | 46 +++++++------ syntax-highlight/scheme.scm | 156 ++++++++++++++++++-------------------------- syntax-highlight/xml.scm | 150 ++++++++++++++++++++---------------------- 3 files changed, 158 insertions(+), 194 deletions(-) diff --git a/syntax-highlight.scm b/syntax-highlight.scm index 091eed2..4326a4c 100644 --- a/syntax-highlight.scm +++ b/syntax-highlight.scm @@ -23,36 +23,34 @@ (define-module (syntax-highlight) #:use-module (ice-9 match) + #:use-module (ice-9 rdelim) #:use-module (srfi srfi-1) #:use-module (srfi srfi-11) #:use-module (srfi srfi-26) - #:use-module (srfi srfi-41) - #:use-module (syntax-highlight utils) + #:use-module (syntax-highlight lexers) #:export (highlight highlights->sxml)) -(define* (highlight highlighter #:optional (stream (current-input-port))) - "Apply HIGHLIGHTER, a syntax highlighting procedure, to STREAM. -STREAM may be an open port, string, or SRFI-41 character stream. If -STREAM is not specified, characters are read from the current input -port." - (let-values (((result remaining) - (highlighter (cond - ((port? stream) - (port->stream stream)) - ((string? stream) - (string->stream stream)) - ((stream? stream) - stream) - (else - (error "Cannot convert to stream: " stream)))))) - ;; If there's a remainder, it means that parsing failed. We want - ;; to preserve *all* of the input text, even if it is invalid. - ;; So, we take the stuff that could be parsed and tack on the - ;; stuff that wasn't. - (if (stream-null? remaining) - result - (append result (list (stream->string remaining)))))) +(define (object->string obj) + (cond + ((input-port? obj) + (read-string obj)) + ((string? obj) + obj) + (else + (error "not an input port or string: " + obj)))) + +(define object->cursor + (compose string->cursor object->string)) + +(define* (highlight lexer #:optional (string-or-port (current-input-port))) + "Apply LEXER to STRING-OR-PORT, a string or an open input port. If +STRING-OR-PORT is not specified, characters are read from the current +input port." + (let-values (((result remainder) + (lexer empty-tokens (object->cursor string-or-port)))) + (and result (tokens->list result)))) (define (highlights->sxml highlights) "Convert HIGHLIGHTS, a list of syntax highlighting expressions, into diff --git a/syntax-highlight/scheme.scm b/syntax-highlight/scheme.scm index 5b906c7..2571e29 100644 --- a/syntax-highlight/scheme.scm +++ b/syntax-highlight/scheme.scm @@ -22,113 +22,87 @@ ;;; Code: (define-module (syntax-highlight scheme) + #:use-module (ice-9 match) #:use-module (srfi srfi-1) #:use-module (srfi srfi-11) - #:use-module (srfi srfi-41) - #:use-module (syntax-highlight parsers) + #:use-module (srfi srfi-26) + #:use-module (syntax-highlight lexers) #:export (%default-special-symbols %default-special-regexps - make-scheme-highlighter - scheme-highlighter)) + make-scheme-lexer + lex-scheme)) (define char-set:lisp-delimiters (char-set-union char-set:whitespace (char-set #\( #\) #\[ #\] #\{ #\}))) -(define (lisp-delimiter? char) - (char-set-contains? char-set:lisp-delimiters char)) - -(define parse-symbol-chars - (parse-char-set - (char-set-complement char-set:lisp-delimiters))) - -(define (parse-specials special-words) - "Create a parser for SPECIAL-WORDS, a list of important terms for a -language." - (define (special word) - (let ((parser (tagged-parser 'special (parse-string word)))) - (lambda (stream) - (let-values (((result rest-of-stream) (parser stream))) - (if (and result (lisp-delimiter? (stream-car stream))) - (values result rest-of-stream) - (parse-fail stream)))))) - - (fold parse-either parse-fail (map special special-words))) - -(define (parse-specials/regexp special-regexps) - (let ((merged-regexp - (string-join (map (lambda (regexp) - (string-append "(" regexp ")")) - special-regexps) - "|"))) - (tagged-parser 'special - (parse-regexp merged-regexp parse-symbol-chars)))) - -(define (parse-openers openers) - (define (open opener) - (tagged-parser 'open (parse-string opener))) - - (fold parse-either parse-fail (map open openers))) - -(define (parse-closers closers) - (define (close closer) - (tagged-parser 'close (parse-string closer))) - - (fold parse-either parse-fail (map close closers))) - -(define parse-symbol - (tagged-parser 'symbol - (parse-char-set - (char-set-complement char-set:lisp-delimiters)))) - -(define parse-keyword - (tagged-parser 'keyword - (parse-map string-concatenate - (parse-each (parse-string "#:") - parse-symbol-chars)))) - -(define parse-string-literal - (tagged-parser 'string (parse-delimited "\""))) - -(define parse-comment - (tagged-parser 'comment (parse-delimited ";" #:until "\n"))) - -(define parse-quoted-symbol - (tagged-parser 'symbol (parse-delimited "#{" #:until "}#"))) +(define char-set:lisp-symbol + (char-set-complement char-set:lisp-delimiters)) (define %default-special-symbols - '("define" "begin" "call-with-current-continuation" "call/cc" + '("begin" "call-with-current-continuation" "call/cc" "call-with-input-file" "call-with-output-file" "case" "cond" "do" "else" "if" "lambda" "λ" "let" "let*" "let-syntax" "letrec" "letrec-syntax" - "export" "import" "library" "define-module" "use-module" "let-values" "let*-values" + "export" "import" "library" "use-module" "and" "or" "delay" "force" - "map" "for-each" - "syntax" "syntax-rules")) - -(define %default-special-regexps - '("^define")) - -(define* (make-scheme-highlighter special-symbols special-regexps) - "Create a syntax highlighting procedure for Scheme that associates -the 'special' tag for symbols appearing in the list SPECIAL-SYMBOLS or -matching a regular expression in SPECIAL-REGEXPS." - (parse-many - (parse-any parse-whitespace - (parse-openers '("(" "[" "{")) - (parse-closers '(")" "]" "}")) - (parse-specials special-symbols) - (parse-specials/regexp special-regexps) - parse-string-literal - parse-comment - parse-keyword - parse-quoted-symbol - parse-symbol))) - -(define scheme-highlighter - (make-scheme-highlighter %default-special-symbols - %default-special-regexps)) + "map" "for-each")) + +(define %default-special-prefixes + '("define" "syntax")) + +(define (lex-special-symbol sym) + (lex-filter (lambda (str) + (string=? sym str)) + (lex-char-set char-set:lisp-symbol))) + +(define (lex-special-symbol symbols prefixes) + (lex-filter (lambda (str) + (or (any (cut string=? symbols <>) symbols) + (any (cut string-prefix? <> str) prefixes))) + (lex-char-set char-set:lisp-symbol))) + +(define (lex-map2 proc lexer) + (lambda (tokens cursor) + (let-values (((result remainder) (lexer tokens cursor))) + (if result + (match (token-take result 2) + ((second first) ; accumulator tokens are in reverse order + (values (token-add (token-drop result 2) + (proc first second)) + remainder))) + (fail))))) + +(define (make-scheme-lexer special-symbols special-prefixes) + "Return a lexer that highlights Scheme source code. Tag strings +that are in SPECIAL-SYMBOLS or match one of the string prefixes in +SPECIAL-PREFIXES with the 'special' tag." + (lex-consume + (lex-any (lex-char-set char-set:whitespace) + (lex-tag 'open (lex-any* (map lex-string '("(" "[" "{")))) + (lex-tag 'close (lex-any* (map lex-string '(")" "]" "}")))) + (lex-tag 'comment (lex-delimited ";" #:until "\n")) + (lex-tag 'multi-line-comment + (lex-delimited "#|" #:until "|#" #:nested? #t)) + (lex-tag 'special + (lex-filter (lambda (str) + (or (any (cut string=? <> str) + special-symbols) + (any (cut string-prefix? <> str) + special-prefixes))) + (lex-char-set char-set:lisp-symbol))) + (lex-tag 'string (lex-delimited "\"")) + (lex-tag 'keyword + (lex-map2 string-append + (lex-all (lex-string "#:") + (lex-char-set char-set:lisp-symbol)))) + (lex-tag 'symbol + (lex-any (lex-delimited "#{" #:until "}#") + (lex-char-set char-set:lisp-symbol)))))) + +(define lex-scheme + (make-scheme-lexer %default-special-symbols %default-special-prefixes)) diff --git a/syntax-highlight/xml.scm b/syntax-highlight/xml.scm index 87b661b..d91a8e5 100644 --- a/syntax-highlight/xml.scm +++ b/syntax-highlight/xml.scm @@ -26,86 +26,78 @@ #:use-module (srfi srfi-1) #:use-module (srfi srfi-11) #:use-module (srfi srfi-41) - #:use-module (syntax-highlight parsers) - #:export (xml-highlighter)) - -(define (flatten+compact highlights) - (define tagged? - (match-lambda - (((? symbol?) _) #t) - (_ #f))) - - (let loop ((highlights highlights) - (result '())) - (match highlights - (() (reverse result)) - (("" . tail) - (loop tail result)) - (((or (? string? head) (? tagged? head)) . tail) - (loop tail (cons head result))) - ((head . tail) - (loop tail (append (reverse (flatten+compact head)) result)))))) - -(define parse-comment - (tagged-parser 'comment (parse-delimited ""))) - -(define parse-xml-symbol - (parse-char-set - (char-set-union char-set:letter+digit - (char-set #\. #\- #\_ #\:)))) - -(define parse-element-name - (tagged-parser 'element parse-xml-symbol)) - -(define parse-whitespace-maybe - (parse-maybe parse-whitespace)) - -(define parse-attribute - (parse-each (tagged-parser 'attribute parse-xml-symbol) - parse-whitespace-maybe - (parse-string "=") - parse-whitespace-maybe - (tagged-parser 'string (parse-delimited "\"")))) - -(define parse-open-tag - (parse-each (tagged-parser 'open (parse-any (parse-string "") - (parse-string "/>") - (parse-string "?>"))))) - -(define parse-close-tag - (parse-each (tagged-parser 'open (parse-string "")))) + #:use-module (syntax-highlight lexers) + #:export (lex-xml)) (define char-set:not-whitespace (char-set-complement char-set:whitespace)) -(define parse-tag - (parse-each (parse-string "<") - (parse-char-set - (char-set-delete char-set:not-whitespace #\>)) - (parse-string ">"))) - -(define parse-entity - (tagged-parser 'entity (parse-delimited "&" #:until ";"))) - -(define parse-text - (parse-char-set - (char-set-difference char-set:full (char-set #\<)))) - -(define xml-highlighter - (parse-map flatten+compact - (parse-many - (parse-any parse-comment - parse-close-tag - parse-open-tag - parse-entity - parse-text)))) +(define char-set:xml-symbol + (char-set-union char-set:letter+digit + (char-set #\. #\- #\_ #\:))) + +(define lex-comment + (lex-tag 'comment (lex-delimited ""))) + +(define lex-xml-symbol + (lex-char-set char-set:xml-symbol)) + +(define lex-element-name + (lex-tag 'element lex-xml-symbol)) + +(define lex-whitespace-maybe + (lex-maybe lex-whitespace)) + +(define lex-attribute + (lex-all (lex-tag 'attribute lex-xml-symbol) + lex-whitespace-maybe + (lex-string "=") + lex-whitespace-maybe + (lex-tag 'string (lex-delimited "\"")))) + +(define lex-open-tag + (lex-all (lex-tag 'open (lex-any (lex-string "") + (lex-string "/>") + (lex-string "?>"))))) + +(define lex-close-tag + (lex-all (lex-tag 'open (lex-string "")))) + +(define lex-entity + (lex-tag 'entity (lex-delimited "&" #:until ";"))) + +(define lex-text + (lex-char-set (char-set-difference char-set:full + (char-set #\< #\&)))) + +(define lex-whitespace-maybe + (lex-maybe lex-whitespace)) + +(define lex-xml-element + (lex-tag 'element lex-xml-symbol)) + +(define lex-xml-attribute + (lex-all (lex-tag 'attribute lex-xml-symbol) + lex-whitespace-maybe + (lex-string "=") + lex-whitespace-maybe + (lex-tag 'string (lex-delimited "\"")))) + +(define lex-xml + (lex-consume + (lex-any lex-comment + lex-close-tag + lex-open-tag + lex-entity + lex-text))) + +(lex-xml empty-tokens (string->cursor "quux © ")) -- cgit v1.2.3