From: Chris Hanson Date: Thu, 13 Apr 2000 16:40:23 +0000 (+0000) Subject: Change rexp compiler to be case sensitive, and add REXP-CASE-FOLD to X-Git-Tag: 20090517-FFI~4032 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=0a8f139df0f4a1727302190f91675848f12e50f2;p=mit-scheme.git Change rexp compiler to be case sensitive, and add REXP-CASE-FOLD to delimit regions that should be case insensitive. --- diff --git a/v7/src/imail/imail-imap-url.scm b/v7/src/imail/imail-imap-url.scm index f877ccd83..069e4fd81 100644 --- a/v7/src/imail/imail-imap-url.scm +++ b/v7/src/imail/imail-imap-url.scm @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: imail-imap-url.scm,v 1.2 2000/04/12 03:56:33 cph Exp $ +;;; $Id: imail-imap-url.scm,v 1.3 2000/04/13 16:40:23 cph Exp $ ;;; ;;; Copyright (c) 2000 Massachusetts Institute of Technology ;;; @@ -87,8 +87,8 @@ ;;;; Matcher language -(define (regexp-matcher pattern) - (let ((pattern (re-compile-pattern pattern #f))) +(define (rexp-matcher pattern) + (let ((pattern (rexp-compile pattern))) (lambda (string start end) (let ((regs (re-substring-match pattern string start end))) (and regs @@ -135,21 +135,16 @@ (char-set #\return #\linefeed))) (define imap:match-atom - (regexp-matcher - (string-append (char-set->regexp imap:char-set:atom-char) - "+"))) + (rexp-matcher (rexp+ imap:char-set:atom-char))) (define imap:match-quoted-string - (regexp-matcher - (string-append - "\"" - (regexp-group (char-set->regexp - (char-set-difference imap:char-set:text-char - imap:char-set:quoted-specials)) - (string-append - "\\\\" - (char-set->regexp imap:char-set:quoted-specials))) - "*\""))) + (rexp-matcher + (rexp-sequence "\"" + (rexp* (rexp-alternatives + (char-set-difference imap:char-set:text-char + imap:char-set:quoted-specials) + (rexp-sequence "\\" imap:char-set:quoted-specials))) + "\""))) (define (imap:match-literal string start end) (let ((regs (re-substring-match "{\\([0-9]+\\)}\r\n" string start end))) @@ -170,74 +165,60 @@ (or (imap:match-quoted-string string start end) (imap:match-literal string start end))) -(define imap:char-set:achar - (char-set-union url:char-set:unreserved - (string->char-set "&=~"))) - -(define imap:regexp:achar - (regexp-group (char-set->regexp imap:char-set:achar) - url:regexp:escape)) - -(define imap:regexp:achar+ - (string-append imap:regexp:achar "+")) - -(define imap:char-set:bchar - (char-set-union imap:char-set:achar - (string->char-set ":@/"))) - -(define imap:regexp:bchar - (regexp-group (char-set->regexp imap:char-set:bchar) - url:regexp:escape)) - -(define imap:regexp:bchar+ - (string-append imap:regexp:bchar "+")) - -(define imap:regexp:enc-auth-type imap:regexp:achar+) -(define imap:regexp:enc-list-mailbox imap:regexp:bchar+) -(define imap:regexp:enc-mailbox imap:regexp:bchar+) -(define imap:regexp:enc-search imap:regexp:bchar+) -(define imap:regexp:enc-section imap:regexp:bchar+) -(define imap:regexp:enc-user imap:regexp:achar+) - -(define imap:regexp:iauth - (string-append ";AUTH=" (regexp-group "\\*" imap:regexp:enc-auth-type))) - -(define (regexp-optional regexp) - (string-append (regexp-group regexp) "?")) - -(define imap:regexp:iuserauth - (regexp-group (string-append imap:regexp:enc-user - (regexp-optional imap:regexp:iauth)) - (string-append (regexp-optional imap:regexp:enc-user) - imap:regexp:iauth))) - -(define imap:regexp:iserver - (string-append (regexp-optional (string-append imap:regexp:iuserauth "@")) - url:regexp:hostport)) - -(define imap:regexp:imailboxlist - (string-append (regexp-optional imap:regexp:enc-list-mailbox) +(define imap:rexp:achar+ + (rexp+ (rexp-alternatives (char-set-union url:char-set:unreserved + (string->char-set "&=~")) + url:rexp:escape))) + +(define imap:rexp:bchar+ + (rexp+ (rexp-alternatives (char-set-union imap:char-set:achar + (string->char-set ":@/")) + url:rexp:escape))) + +(define imap:rexp:enc-auth-type imap:rexp:achar+) +(define imap:rexp:enc-list-mailbox imap:rexp:bchar+) +(define imap:rexp:enc-mailbox imap:rexp:bchar+) +(define imap:rexp:enc-search imap:rexp:bchar+) +(define imap:rexp:enc-section imap:rexp:bchar+) +(define imap:rexp:enc-user imap:rexp:achar+) + +(define imap:rexp:iauth + (rexp-sequence ";AUTH=" (regexp-alternatives "*" imap:rexp:enc-auth-type))) + +(define imap:rexp:iuserauth + (rexp-alternatives (rexp-sequence imap:rexp:enc-user + (rexp-optional imap:rexp:iauth)) + (rexp-sequence (rexp-optional imap:rexp:enc-user) + imap:rexp:iauth))) + +(define imap:rexp:iserver + (rexp-sequence (rexp-optional (rexp-sequence imap:rexp:iuserauth "@")) + url:rexp:hostport)) + +(define imap:rexp:imailboxlist + (rexp-sequence (rexp-optional imap:rexp:enc-list-mailbox) ";TYPE=" - (regexp-group "LIST" "LSUB"))) + (rexp-alternatives "LIST" "LSUB"))) -(define imap:regexp:nz-number - "[1-9][0-9]*") +(define imap:rexp:nz-number + (rexp-sequence (char-set-difference char-set:numeric (char-set #\0)) + (rexp* char-set:numeric))) -(define imap:regexp:uidvalidity - (string-append ";UIDVALIDITY=" imap:regexp:nz-number)) +(define imap:rexp:uidvalidity + (rexp-sequence ";UIDVALIDITY=" imap:rexp:nz-number)) -(define imap:regexp:iuid - (string-append ";UID=" imap:regexp:nz-number)) +(define imap:rexp:iuid + (rexp-sequence ";UID=" imap:rexp:nz-number)) -(define imap:regexp:imessagelist - (string-append imap:regexp:enc-mailbox - (regexp-optional (string-append "\\?" imap:regexp:enc-search)) - (regexp-optional imap:regexp:uidvalidity))) +(define imap:rexp:imessagelist + (rexp-sequence imap:rexp:enc-mailbox + (rexp-optional (rexp-sequence "?" imap:rexp:enc-search)) + (rexp-optional imap:rexp:uidvalidity))) -(define imap:regexp:imessagepart - (string-append imap:regexp:enc-mailbox - (regexp-optional imap:regexp:uidvalidity) - imap:regexp:iuid - (regexp-optional - (string-append "/;SECTION=" imap:regexp:enc-section)))) +(define imap:rexp:imessagepart + (rexp-sequence imap:rexp:enc-mailbox + (rexp-optional imap:rexp:uidvalidity) + imap:rexp:iuid + (rexp-optional + (rexp-sequence "/;SECTION=" imap:rexp:enc-section)))) \ No newline at end of file diff --git a/v7/src/imail/imail.pkg b/v7/src/imail/imail.pkg index 489f4c6e3..cbdaedf7c 100644 --- a/v7/src/imail/imail.pkg +++ b/v7/src/imail/imail.pkg @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: imail.pkg,v 1.9 2000/04/13 15:59:32 cph Exp $ +;;; $Id: imail.pkg,v 1.10 2000/04/13 16:40:11 cph Exp $ ;;; ;;; Copyright (c) 2000 Massachusetts Institute of Technology ;;; @@ -33,6 +33,7 @@ rexp->regexp rexp-alternatives rexp-any-char + rexp-case-fold rexp-compile rexp-group rexp-line-end diff --git a/v7/src/imail/rexp.scm b/v7/src/imail/rexp.scm index 291c814e6..998535f74 100644 --- a/v7/src/imail/rexp.scm +++ b/v7/src/imail/rexp.scm @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: rexp.scm,v 1.6 2000/04/13 16:23:50 cph Exp $ +;;; $Id: rexp.scm,v 1.7 2000/04/13 16:40:04 cph Exp $ ;;; ;;; Copyright (c) 2000 Massachusetts Institute of Technology ;;; @@ -24,7 +24,6 @@ (define (rexp? object) (or (string? rexp) - (char? rexp) (char-set? rexp) (and (pair? rexp) (list? (cdr rexp)) @@ -93,13 +92,24 @@ (define (rexp-syntax-char type) `(SYNTAX-CHAR ,type)) (define (rexp-not-syntax-char type) `(NOT-SYNTAX-CHAR ,type)) +(define (rexp-case-fold rexp) + (let ((lose (lambda () (error "Malformed rexp:" rexp)))) + (cond ((string? rexp) + `(CASE-FOLD rexp)) + ((and (pair? rexp) + (memq (car rexp) '(ALTERNATIVES SEQUENCE GROUP OPTIONAL * +)) + (list? (cdr rexp))) + (cons (car rexp) + (map rexp-case-fold (cdr rexp)))) + (else rexp)))) + (define (rexp-groupify rexp) (let ((lose (lambda () (error "Malformed rexp:" rexp)))) (cond ((string? rexp) (if (fix:= 1 (string-length rexp)) rexp (rexp-group rexp))) - ((or (char? rexp) (char-set? rexp)) + ((char-set? rexp) rexp) ((pair? rexp) (cond ((memq (car rexp) grouped-rexp-types) @@ -122,15 +132,13 @@ '(LINE-START LINE-END STRING-START STRING-END WORD-EDGE NOT-WORD-EDGE WORD-START WORD-END)) -(define (rexp-compile rexp case-fold?) - (re-compile-pattern (rexp->regexp rexp) case-fold?)) +(define (rexp-compile rexp) + (re-compile-pattern (rexp->regexp rexp) #f)) (define (rexp->regexp rexp) (let ((lose (lambda () (error "Malformed rexp:" rexp)))) (cond ((string? rexp) (re-quote-string rexp)) - ((char? rexp) - (re-quote-string (string rexp))) ((char-set? rexp) (char-set->regexp rexp)) ((and (pair? rexp) (list? (cdr rexp))) @@ -154,6 +162,11 @@ ((OPTIONAL) (string-append (rexp-arg) "?")) ((*) (string-append (rexp-arg) "*")) ((+) (string-append (rexp-arg) "+")) + ((CASE-FOLD) + (let ((arg (one-arg))) + (if (string? arg) + (case-fold-string arg) + (lose)))) ((ANY-CHAR) ".") ((LINE-START) "^") ((LINE-END) "$") @@ -169,6 +182,24 @@ ((NOT-SYNTAX-CHAR) (string-append "\\S" (syntax-type))) (else (lose)))))) (else (lose))))) + +(define (case-fold-string s) + (let ((end (string-length s))) + (let loop ((start 0) (parts '())) + (let ((index + (substring-find-next-char-in-set s start end + char-set:alphabetic))) + (if index + (loop (fix:+ index 1) + (cons* (let ((char (string-ref s index))) + (string-append "[" + (string (char-upcase char)) + (string (char-downcase char)) + "]")) + (re-quote-string + (substring s start index)) + parts)) + (apply string-append (reverse! parts))))))) (define (separated-append tokens separator) (cond ((not (pair? tokens)) "") diff --git a/v7/src/imail/url.scm b/v7/src/imail/url.scm index a1f0f5997..9b2f952f2 100644 --- a/v7/src/imail/url.scm +++ b/v7/src/imail/url.scm @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: url.scm,v 1.4 2000/04/13 15:59:26 cph Exp $ +;;; $Id: url.scm,v 1.5 2000/04/13 16:40:17 cph Exp $ ;;; ;;; Copyright (c) 2000 Massachusetts Institute of Technology ;;; @@ -81,7 +81,7 @@ (url:decode-substring string 0 (string-length string))) (define url:substring-encoded? - (let ((pattern (rexp-compile-pattern url:rexp:xchar #f))) + (let ((pattern (rexp-compile url:rexp:xchar))) (lambda (string start end) (let ((regs (re-substring-match pattern string start end))) (and regs @@ -123,7 +123,7 @@ encoded)))) (define (url:decode-substring string start end) - (let ((patt (rexp-compile url:rexp:escape #f))) + (let ((patt (rexp-compile url:rexp:escape))) (let ((n-encoded (let loop ((start start) (n-encoded 0)) (let ((regs (re-substring-search-forward patt string start end))) diff --git a/v7/src/runtime/rexp.scm b/v7/src/runtime/rexp.scm index 291c814e6..998535f74 100644 --- a/v7/src/runtime/rexp.scm +++ b/v7/src/runtime/rexp.scm @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: rexp.scm,v 1.6 2000/04/13 16:23:50 cph Exp $ +;;; $Id: rexp.scm,v 1.7 2000/04/13 16:40:04 cph Exp $ ;;; ;;; Copyright (c) 2000 Massachusetts Institute of Technology ;;; @@ -24,7 +24,6 @@ (define (rexp? object) (or (string? rexp) - (char? rexp) (char-set? rexp) (and (pair? rexp) (list? (cdr rexp)) @@ -93,13 +92,24 @@ (define (rexp-syntax-char type) `(SYNTAX-CHAR ,type)) (define (rexp-not-syntax-char type) `(NOT-SYNTAX-CHAR ,type)) +(define (rexp-case-fold rexp) + (let ((lose (lambda () (error "Malformed rexp:" rexp)))) + (cond ((string? rexp) + `(CASE-FOLD rexp)) + ((and (pair? rexp) + (memq (car rexp) '(ALTERNATIVES SEQUENCE GROUP OPTIONAL * +)) + (list? (cdr rexp))) + (cons (car rexp) + (map rexp-case-fold (cdr rexp)))) + (else rexp)))) + (define (rexp-groupify rexp) (let ((lose (lambda () (error "Malformed rexp:" rexp)))) (cond ((string? rexp) (if (fix:= 1 (string-length rexp)) rexp (rexp-group rexp))) - ((or (char? rexp) (char-set? rexp)) + ((char-set? rexp) rexp) ((pair? rexp) (cond ((memq (car rexp) grouped-rexp-types) @@ -122,15 +132,13 @@ '(LINE-START LINE-END STRING-START STRING-END WORD-EDGE NOT-WORD-EDGE WORD-START WORD-END)) -(define (rexp-compile rexp case-fold?) - (re-compile-pattern (rexp->regexp rexp) case-fold?)) +(define (rexp-compile rexp) + (re-compile-pattern (rexp->regexp rexp) #f)) (define (rexp->regexp rexp) (let ((lose (lambda () (error "Malformed rexp:" rexp)))) (cond ((string? rexp) (re-quote-string rexp)) - ((char? rexp) - (re-quote-string (string rexp))) ((char-set? rexp) (char-set->regexp rexp)) ((and (pair? rexp) (list? (cdr rexp))) @@ -154,6 +162,11 @@ ((OPTIONAL) (string-append (rexp-arg) "?")) ((*) (string-append (rexp-arg) "*")) ((+) (string-append (rexp-arg) "+")) + ((CASE-FOLD) + (let ((arg (one-arg))) + (if (string? arg) + (case-fold-string arg) + (lose)))) ((ANY-CHAR) ".") ((LINE-START) "^") ((LINE-END) "$") @@ -169,6 +182,24 @@ ((NOT-SYNTAX-CHAR) (string-append "\\S" (syntax-type))) (else (lose)))))) (else (lose))))) + +(define (case-fold-string s) + (let ((end (string-length s))) + (let loop ((start 0) (parts '())) + (let ((index + (substring-find-next-char-in-set s start end + char-set:alphabetic))) + (if index + (loop (fix:+ index 1) + (cons* (let ((char (string-ref s index))) + (string-append "[" + (string (char-upcase char)) + (string (char-downcase char)) + "]")) + (re-quote-string + (substring s start index)) + parts)) + (apply string-append (reverse! parts))))))) (define (separated-append tokens separator) (cond ((not (pair? tokens)) "") diff --git a/v7/src/runtime/url.scm b/v7/src/runtime/url.scm index a1f0f5997..9b2f952f2 100644 --- a/v7/src/runtime/url.scm +++ b/v7/src/runtime/url.scm @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: url.scm,v 1.4 2000/04/13 15:59:26 cph Exp $ +;;; $Id: url.scm,v 1.5 2000/04/13 16:40:17 cph Exp $ ;;; ;;; Copyright (c) 2000 Massachusetts Institute of Technology ;;; @@ -81,7 +81,7 @@ (url:decode-substring string 0 (string-length string))) (define url:substring-encoded? - (let ((pattern (rexp-compile-pattern url:rexp:xchar #f))) + (let ((pattern (rexp-compile url:rexp:xchar))) (lambda (string start end) (let ((regs (re-substring-match pattern string start end))) (and regs @@ -123,7 +123,7 @@ encoded)))) (define (url:decode-substring string start end) - (let ((patt (rexp-compile url:rexp:escape #f))) + (let ((patt (rexp-compile url:rexp:escape))) (let ((n-encoded (let loop ((start start) (n-encoded 0)) (let ((regs (re-substring-search-forward patt string start end)))