From: Chris Hanson Date: Wed, 12 Apr 2000 03:08:15 +0000 (+0000) Subject: Reconcile the character sets and regexps used here with the X-Git-Tag: 20090517-FFI~4046 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=193bcc03ea4e8cc73805eb0c230e675ecf1f45cd;p=mit-scheme.git Reconcile the character sets and regexps used here with the descriptions in RFC 1738. Also export them since the IMAP URL code needs access to some of them. --- diff --git a/v7/src/imail/imail.pkg b/v7/src/imail/imail.pkg index 7878062d4..a06853ba4 100644 --- a/v7/src/imail/imail.pkg +++ b/v7/src/imail/imail.pkg @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: imail.pkg,v 1.3 2000/04/12 02:25:56 cph Exp $ +;;; $Id: imail.pkg,v 1.4 2000/04/12 03:08:15 cph Exp $ ;;; ;;; Copyright (c) 2000 Massachusetts Institute of Technology ;;; @@ -41,9 +41,20 @@ (files "url") (parent (edwin)) (export (edwin) + url:char-set:escaped + url:char-set:extra + url:char-set:national + url:char-set:punctuation + url:char-set:reserved + url:char-set:safe + url:char-set:unescaped + url:char-set:unreserved url:decode-string url:decode-substring url:encode-string url:encode-substring + url:regexp:escape + url:regexp:uchar + url:regexp:xchar url:string-encoded? url:substring-encoded?)) \ No newline at end of file diff --git a/v7/src/imail/url.scm b/v7/src/imail/url.scm index 71408a39e..5f6853cae 100644 --- a/v7/src/imail/url.scm +++ b/v7/src/imail/url.scm @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: url.scm,v 1.1 2000/04/12 02:26:05 cph Exp $ +;;; $Id: url.scm,v 1.2 2000/04/12 03:08:11 cph Exp $ ;;; ;;; Copyright (c) 2000 Massachusetts Institute of Technology ;;; @@ -22,17 +22,35 @@ (declare (usual-integrations)) -(define url:char-set:safe - (char-set-difference - char-set:graphic - (char-set #\space #\< #\> #\" #\# #\% #\{ #\} #\| #\\ #\^ #\~ #\[ #\] #\`))) +(define url:char-set:safe (string->char-set "$-_.+")) +(define url:char-set:extra (string->char-set "!*'(),")) +(define url:char-set:national (string->char-set "{}|\^~[]`")) +(define url:char-set:punctuation (string->char-set "<>#%\"")) +(define url:char-set:reserved (string->char-set ";/?:@&=")) -(define url:char-set:must-encode - (char-set-invert url:char-set:safe)) +(define url:char-set:unreserved + (char-set-union char-set:alphanumeric + url:char-set:safe + url:char-set:extra)) -(define url:encoded-char-regexp +(define url:char-set:unescaped + (char-set-union url:char-set:unreserved + url:char-set:reserved)) + +(define url:char-set:escaped + (char-set-invert url:char-set:unescaped)) + +(define url:regexp:escape "%[0-9A-Fa-f][0-9A-Fa-f]") +(define url:regexp:uchar + (regexp-group (char-set->regexp url:char-set:unreserved) + url:regexp:escape)) + +(define url:regexp:xchar + (regexp-group (char-set->regexp url:char-set:unescaped) + url:regexp:escape)) + (define (url:string-encoded? string) (url:substring-encoded? string 0 (string-length string))) @@ -46,9 +64,8 @@ (let ((pattern (re-compile-pattern (string-append - (regexp-group - (char-set->regexp-char-range url:char-set:safe) - url:encoded-char-regexp) + (regexp-group (char-set->regexp url:char-set:unescaped) + url:regexp:escape) "*") #f))) (lambda (string start end) @@ -61,7 +78,7 @@ (let loop ((start start) (n-to-encode 0)) (let ((index (substring-find-next-char-in-set string start end - url:char-set:must-encode))) + url:char-set:escaped))) (if index (loop (fix:+ index 1) (fix:+ n-to-encode 1)) n-to-encode))))) @@ -73,7 +90,7 @@ (let loop ((start start) (i 0)) (let ((index (substring-find-next-char-in-set string start end - url:char-set:must-encode))) + url:char-set:escaped))) (if index (begin (substring-move-left! string start index encoded i) @@ -95,7 +112,7 @@ (let ((n-encoded (let loop ((start start) (n-encoded 0)) (let ((regs - (re-substring-search-forward url:encoded-char-regexp + (re-substring-search-forward url:regexp:escape string start end))) (if regs (loop (re-match-end-index 0 regs) (fix:+ n-encoded 1)) @@ -106,7 +123,7 @@ (make-string (fix:- (fix:- end start) (fix:* 2 n-encoded))))) (let loop ((start start) (i 0)) (let ((regs - (re-substring-search-forward url:encoded-char-regexp + (re-substring-search-forward url:regexp:escape string start end))) (if regs (let ((index (re-match-start-index 0 regs))) diff --git a/v7/src/runtime/url.scm b/v7/src/runtime/url.scm index 71408a39e..5f6853cae 100644 --- a/v7/src/runtime/url.scm +++ b/v7/src/runtime/url.scm @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: url.scm,v 1.1 2000/04/12 02:26:05 cph Exp $ +;;; $Id: url.scm,v 1.2 2000/04/12 03:08:11 cph Exp $ ;;; ;;; Copyright (c) 2000 Massachusetts Institute of Technology ;;; @@ -22,17 +22,35 @@ (declare (usual-integrations)) -(define url:char-set:safe - (char-set-difference - char-set:graphic - (char-set #\space #\< #\> #\" #\# #\% #\{ #\} #\| #\\ #\^ #\~ #\[ #\] #\`))) +(define url:char-set:safe (string->char-set "$-_.+")) +(define url:char-set:extra (string->char-set "!*'(),")) +(define url:char-set:national (string->char-set "{}|\^~[]`")) +(define url:char-set:punctuation (string->char-set "<>#%\"")) +(define url:char-set:reserved (string->char-set ";/?:@&=")) -(define url:char-set:must-encode - (char-set-invert url:char-set:safe)) +(define url:char-set:unreserved + (char-set-union char-set:alphanumeric + url:char-set:safe + url:char-set:extra)) -(define url:encoded-char-regexp +(define url:char-set:unescaped + (char-set-union url:char-set:unreserved + url:char-set:reserved)) + +(define url:char-set:escaped + (char-set-invert url:char-set:unescaped)) + +(define url:regexp:escape "%[0-9A-Fa-f][0-9A-Fa-f]") +(define url:regexp:uchar + (regexp-group (char-set->regexp url:char-set:unreserved) + url:regexp:escape)) + +(define url:regexp:xchar + (regexp-group (char-set->regexp url:char-set:unescaped) + url:regexp:escape)) + (define (url:string-encoded? string) (url:substring-encoded? string 0 (string-length string))) @@ -46,9 +64,8 @@ (let ((pattern (re-compile-pattern (string-append - (regexp-group - (char-set->regexp-char-range url:char-set:safe) - url:encoded-char-regexp) + (regexp-group (char-set->regexp url:char-set:unescaped) + url:regexp:escape) "*") #f))) (lambda (string start end) @@ -61,7 +78,7 @@ (let loop ((start start) (n-to-encode 0)) (let ((index (substring-find-next-char-in-set string start end - url:char-set:must-encode))) + url:char-set:escaped))) (if index (loop (fix:+ index 1) (fix:+ n-to-encode 1)) n-to-encode))))) @@ -73,7 +90,7 @@ (let loop ((start start) (i 0)) (let ((index (substring-find-next-char-in-set string start end - url:char-set:must-encode))) + url:char-set:escaped))) (if index (begin (substring-move-left! string start index encoded i) @@ -95,7 +112,7 @@ (let ((n-encoded (let loop ((start start) (n-encoded 0)) (let ((regs - (re-substring-search-forward url:encoded-char-regexp + (re-substring-search-forward url:regexp:escape string start end))) (if regs (loop (re-match-end-index 0 regs) (fix:+ n-encoded 1)) @@ -106,7 +123,7 @@ (make-string (fix:- (fix:- end start) (fix:* 2 n-encoded))))) (let loop ((start start) (i 0)) (let ((regs - (re-substring-search-forward url:encoded-char-regexp + (re-substring-search-forward url:regexp:escape string start end))) (if regs (let ((index (re-match-start-index 0 regs)))