From: Chris Hanson Date: Thu, 13 Apr 2000 16:58:40 +0000 (+0000) Subject: Change all one-arg rexp combinators to accept any number of args and X-Git-Tag: 20090517-FFI~4030 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=9bf31d057a0a6f7ca92bb71aab51728949b24c68;p=mit-scheme.git Change all one-arg rexp combinators to accept any number of args and treat it as an implicit sequence. Change sequence combinator to collapse nested sequences. --- diff --git a/v7/src/imail/imail-imap-url.scm b/v7/src/imail/imail-imap-url.scm index 8292b60a1..02139457e 100644 --- a/v7/src/imail/imail-imap-url.scm +++ b/v7/src/imail/imail-imap-url.scm @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: imail-imap-url.scm,v 1.4 2000/04/13 16:42:16 cph Exp $ +;;; $Id: imail-imap-url.scm,v 1.5 2000/04/13 16:58:39 cph Exp $ ;;; ;;; Copyright (c) 2000 Massachusetts Institute of Technology ;;; @@ -193,7 +193,7 @@ imap:rexp:iauth))) (define imap:rexp:iserver - (rexp-sequence (rexp-optional (rexp-sequence imap:rexp:iuserauth "@")) + (rexp-sequence (rexp-optional imap:rexp:iuserauth "@") url:rexp:hostport)) (define imap:rexp:imailboxlist @@ -213,14 +213,12 @@ (define imap:rexp:imessagelist (rexp-sequence imap:rexp:enc-mailbox - (rexp-optional (rexp-sequence "?" imap:rexp:enc-search)) + (rexp-optional "?" imap:rexp:enc-search) (rexp-optional imap:rexp:uidvalidity))) (define imap:rexp:imessagepart (rexp-sequence imap:rexp:enc-mailbox (rexp-optional imap:rexp:uidvalidity) imap:rexp:iuid - (rexp-optional - (rexp-sequence (rexp-case-fold "/;SECTION=") - imap:rexp:enc-section)))) - \ No newline at end of file + (rexp-optional (rexp-case-fold "/;SECTION=") + imap:rexp:enc-section))) \ No newline at end of file diff --git a/v7/src/imail/rexp.scm b/v7/src/imail/rexp.scm index 998535f74..dcabc19fb 100644 --- a/v7/src/imail/rexp.scm +++ b/v7/src/imail/rexp.scm @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: rexp.scm,v 1.7 2000/04/13 16:40:04 cph Exp $ +;;; $Id: rexp.scm,v 1.8 2000/04/13 16:56:49 cph Exp $ ;;; ;;; Copyright (c) 2000 Massachusetts Institute of Technology ;;; @@ -36,17 +36,23 @@ (for-all? (cdr rexp) rexp?)) ((GROUP OPTIONAL * +) (and (one-arg) - (not (and (pair? rexp) - (memq (car rexp) nongroupable-rexp-types))))) + (not (or (and (string? rexp) + (string-null? rexp)) + (and (pair? rexp) + (memq (car rexp) boundary-rexp-types)))))) ((ANY-CHAR LINE-START LINE-END STRING-START STRING-END WORD-EDGE NOT-WORD-EDGE WORD-START WORD-END WORD-CHAR NOT-WORD-CHAR) - #t) + (null? (cdr rexp))) ((SYNTAX-CHAR NOT-SYNTAX-CHAR) (and (one-arg) (assq (cadr rexp) syntax-type-alist))) (else #f)))))) +(define boundary-rexp-types + '(LINE-START LINE-END STRING-START STRING-END WORD-EDGE NOT-WORD-EDGE + WORD-START WORD-END)) + (define syntax-type-alist '((WHITESPACE . " ") (PUNCTUATION . ".") @@ -66,17 +72,51 @@ `(ALTERNATIVES ,@rexps)) (define (rexp-sequence . rexps) - `(SEQUENCE ,@(map (lambda (rexp) - (if (and (pair? rexp) - (eq? (car rexp) 'ALTERNATIVES)) - (rexp-group rexp) - rexp)) - rexps))) + (let ((rexps (simplify-sequence-args rexps))) + (if (pair? rexps) + (if (pair? (cdr rexps)) + `(SEQUENCE ,@rexps) + (car rexps)) + ""))) + +(define (simplify-sequence-args rexps) + (append-map (lambda (rexp) + (cond ((and (string? rexp) (string-null? rexp)) + '()) + ((and (pair? rexp) (eq? 'SEQUENCE (car rexp))) + (cdr rexp)) + ((and (pair? rexp) (eq? 'ALTERNATIVES (car rexp))) + (list (rexp-group rexp))) + (else + (list rexp)))) + rexps)) + +(define (rexp-group . rexps) + `(GROUP ,(apply rexp-sequence rexps))) + +(define (rexp-optional . rexps) + `(OPTIONAL ,(rexp-groupify (apply rexp-sequence rexps)))) + +(define (rexp* . rexps) + `(* ,(rexp-groupify (apply rexp-sequence rexps)))) -(define (rexp-group rexp) `(GROUP ,rexp)) -(define (rexp-optional rexp) `(OPTIONAL ,(rexp-groupify rexp))) -(define (rexp* rexp) `(* ,(rexp-groupify rexp))) -(define (rexp+ rexp) `(+ ,(rexp-groupify rexp))) +(define (rexp+ . rexps) + `(+ ,(rexp-groupify (apply rexp-sequence rexps)))) + +(define (rexp-groupify rexp) + (let ((no-group (lambda () (error "Expression can't be grouped:" rexp)))) + (cond ((string? rexp) + (case (string-length rexp) + ((0) (no-group)) + ((1) rexp) + (else (rexp-group rexp)))) + ((pair? rexp) + (cond ((memq (car rexp) boundary-rexp-types) + (no-group)) + ((memq (car rexp) '(ALTERNATIVES SEQUENCE OPTIONAL * +)) + (rexp-group rexp)) + (else rexp))) + (else rexp)))) (define (rexp-any-char) `(ANY-CHAR)) (define (rexp-line-start) `(LINE-START)) @@ -102,35 +142,6 @@ (cons (car rexp) (map rexp-case-fold (cdr rexp)))) (else rexp)))) - -(define (rexp-groupify rexp) - (let ((lose (lambda () (error "Malformed rexp:" rexp)))) - (cond ((string? rexp) - (if (fix:= 1 (string-length rexp)) - rexp - (rexp-group rexp))) - ((char-set? rexp) - rexp) - ((pair? rexp) - (cond ((memq (car rexp) grouped-rexp-types) - rexp) - ((memq (car rexp) groupable-rexp-types) - (rexp-group rexp)) - ((memq (car rexp) nongroupable-rexp-types) - (error "Expression can't be grouped:" rexp)) - (else - (lose)))) - (else (lose))))) - -(define grouped-rexp-types - '(GROUP ANY-CHAR WORD-CHAR NOT-WORD-CHAR SYNTAX-CHAR NOT-SYNTAX-CHAR)) - -(define groupable-rexp-types - '(ALTERNATIVES SEQUENCE OPTIONAL * +)) - -(define nongroupable-rexp-types - '(LINE-START LINE-END STRING-START STRING-END WORD-EDGE NOT-WORD-EDGE - WORD-START WORD-END)) (define (rexp-compile rexp) (re-compile-pattern (rexp->regexp rexp) #f)) diff --git a/v7/src/imail/url.scm b/v7/src/imail/url.scm index 9b2f952f2..c09fbd1d2 100644 --- a/v7/src/imail/url.scm +++ b/v7/src/imail/url.scm @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: url.scm,v 1.5 2000/04/13 16:40:17 cph Exp $ +;;; $Id: url.scm,v 1.6 2000/04/13 16:58:40 cph Exp $ ;;; ;;; Copyright (c) 2000 Massachusetts Institute of Technology ;;; @@ -52,11 +52,10 @@ (define url:rexp:hostname (let ((tail (rexp-optional - (rexp-sequence - (rexp* - (char-set-union char-set:alphanumeric (string->char-set "-"))) - char-set:alphanumeric)))) - (rexp-sequence (rexp* (rexp-sequence char-set:alphanumeric tail ".")) + (rexp* + (char-set-union char-set:alphanumeric (string->char-set "-"))) + char-set:alphanumeric))) + (rexp-sequence (rexp* char-set:alphanumeric tail ".") char-set:alphabetic tail))) @@ -68,8 +67,7 @@ (rexp-alternatives url:rexp:hostname url:rexp:hostnumber)) (define url:rexp:hostport - (rexp-sequence url:rexp:host - (rexp-optional (rexp-sequence ":" (rexp+ char-set:numeric))))) + (rexp-sequence url:rexp:host (rexp-optional ":" (rexp+ char-set:numeric)))) (define (url:string-encoded? string) (url:substring-encoded? string 0 (string-length string))) diff --git a/v7/src/runtime/rexp.scm b/v7/src/runtime/rexp.scm index 998535f74..dcabc19fb 100644 --- a/v7/src/runtime/rexp.scm +++ b/v7/src/runtime/rexp.scm @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: rexp.scm,v 1.7 2000/04/13 16:40:04 cph Exp $ +;;; $Id: rexp.scm,v 1.8 2000/04/13 16:56:49 cph Exp $ ;;; ;;; Copyright (c) 2000 Massachusetts Institute of Technology ;;; @@ -36,17 +36,23 @@ (for-all? (cdr rexp) rexp?)) ((GROUP OPTIONAL * +) (and (one-arg) - (not (and (pair? rexp) - (memq (car rexp) nongroupable-rexp-types))))) + (not (or (and (string? rexp) + (string-null? rexp)) + (and (pair? rexp) + (memq (car rexp) boundary-rexp-types)))))) ((ANY-CHAR LINE-START LINE-END STRING-START STRING-END WORD-EDGE NOT-WORD-EDGE WORD-START WORD-END WORD-CHAR NOT-WORD-CHAR) - #t) + (null? (cdr rexp))) ((SYNTAX-CHAR NOT-SYNTAX-CHAR) (and (one-arg) (assq (cadr rexp) syntax-type-alist))) (else #f)))))) +(define boundary-rexp-types + '(LINE-START LINE-END STRING-START STRING-END WORD-EDGE NOT-WORD-EDGE + WORD-START WORD-END)) + (define syntax-type-alist '((WHITESPACE . " ") (PUNCTUATION . ".") @@ -66,17 +72,51 @@ `(ALTERNATIVES ,@rexps)) (define (rexp-sequence . rexps) - `(SEQUENCE ,@(map (lambda (rexp) - (if (and (pair? rexp) - (eq? (car rexp) 'ALTERNATIVES)) - (rexp-group rexp) - rexp)) - rexps))) + (let ((rexps (simplify-sequence-args rexps))) + (if (pair? rexps) + (if (pair? (cdr rexps)) + `(SEQUENCE ,@rexps) + (car rexps)) + ""))) + +(define (simplify-sequence-args rexps) + (append-map (lambda (rexp) + (cond ((and (string? rexp) (string-null? rexp)) + '()) + ((and (pair? rexp) (eq? 'SEQUENCE (car rexp))) + (cdr rexp)) + ((and (pair? rexp) (eq? 'ALTERNATIVES (car rexp))) + (list (rexp-group rexp))) + (else + (list rexp)))) + rexps)) + +(define (rexp-group . rexps) + `(GROUP ,(apply rexp-sequence rexps))) + +(define (rexp-optional . rexps) + `(OPTIONAL ,(rexp-groupify (apply rexp-sequence rexps)))) + +(define (rexp* . rexps) + `(* ,(rexp-groupify (apply rexp-sequence rexps)))) -(define (rexp-group rexp) `(GROUP ,rexp)) -(define (rexp-optional rexp) `(OPTIONAL ,(rexp-groupify rexp))) -(define (rexp* rexp) `(* ,(rexp-groupify rexp))) -(define (rexp+ rexp) `(+ ,(rexp-groupify rexp))) +(define (rexp+ . rexps) + `(+ ,(rexp-groupify (apply rexp-sequence rexps)))) + +(define (rexp-groupify rexp) + (let ((no-group (lambda () (error "Expression can't be grouped:" rexp)))) + (cond ((string? rexp) + (case (string-length rexp) + ((0) (no-group)) + ((1) rexp) + (else (rexp-group rexp)))) + ((pair? rexp) + (cond ((memq (car rexp) boundary-rexp-types) + (no-group)) + ((memq (car rexp) '(ALTERNATIVES SEQUENCE OPTIONAL * +)) + (rexp-group rexp)) + (else rexp))) + (else rexp)))) (define (rexp-any-char) `(ANY-CHAR)) (define (rexp-line-start) `(LINE-START)) @@ -102,35 +142,6 @@ (cons (car rexp) (map rexp-case-fold (cdr rexp)))) (else rexp)))) - -(define (rexp-groupify rexp) - (let ((lose (lambda () (error "Malformed rexp:" rexp)))) - (cond ((string? rexp) - (if (fix:= 1 (string-length rexp)) - rexp - (rexp-group rexp))) - ((char-set? rexp) - rexp) - ((pair? rexp) - (cond ((memq (car rexp) grouped-rexp-types) - rexp) - ((memq (car rexp) groupable-rexp-types) - (rexp-group rexp)) - ((memq (car rexp) nongroupable-rexp-types) - (error "Expression can't be grouped:" rexp)) - (else - (lose)))) - (else (lose))))) - -(define grouped-rexp-types - '(GROUP ANY-CHAR WORD-CHAR NOT-WORD-CHAR SYNTAX-CHAR NOT-SYNTAX-CHAR)) - -(define groupable-rexp-types - '(ALTERNATIVES SEQUENCE OPTIONAL * +)) - -(define nongroupable-rexp-types - '(LINE-START LINE-END STRING-START STRING-END WORD-EDGE NOT-WORD-EDGE - WORD-START WORD-END)) (define (rexp-compile rexp) (re-compile-pattern (rexp->regexp rexp) #f)) diff --git a/v7/src/runtime/url.scm b/v7/src/runtime/url.scm index 9b2f952f2..c09fbd1d2 100644 --- a/v7/src/runtime/url.scm +++ b/v7/src/runtime/url.scm @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: url.scm,v 1.5 2000/04/13 16:40:17 cph Exp $ +;;; $Id: url.scm,v 1.6 2000/04/13 16:58:40 cph Exp $ ;;; ;;; Copyright (c) 2000 Massachusetts Institute of Technology ;;; @@ -52,11 +52,10 @@ (define url:rexp:hostname (let ((tail (rexp-optional - (rexp-sequence - (rexp* - (char-set-union char-set:alphanumeric (string->char-set "-"))) - char-set:alphanumeric)))) - (rexp-sequence (rexp* (rexp-sequence char-set:alphanumeric tail ".")) + (rexp* + (char-set-union char-set:alphanumeric (string->char-set "-"))) + char-set:alphanumeric))) + (rexp-sequence (rexp* char-set:alphanumeric tail ".") char-set:alphabetic tail))) @@ -68,8 +67,7 @@ (rexp-alternatives url:rexp:hostname url:rexp:hostnumber)) (define url:rexp:hostport - (rexp-sequence url:rexp:host - (rexp-optional (rexp-sequence ":" (rexp+ char-set:numeric))))) + (rexp-sequence url:rexp:host (rexp-optional ":" (rexp+ char-set:numeric)))) (define (url:string-encoded? string) (url:substring-encoded? string 0 (string-length string)))