From: Chris Hanson <org/chris-hanson/cph>
Date: Thu, 30 Sep 2010 10:13:36 +0000 (-0700)
Subject: Change URI abstraction to handle percent encoding in the authority.
X-Git-Tag: 20101212-Gtk~55^2
X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=25fe5c92fff7da0b9bbd819c558b92dbd568508d;p=mit-scheme.git

Change URI abstraction to handle percent encoding in the authority.
Change terminology slightly to match RFC.
---

diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg
index 47fa4d19b..aacef35b2 100644
--- a/src/runtime/runtime.pkg
+++ b/src/runtime/runtime.pkg
@@ -5455,7 +5455,6 @@ USA.
 	  char-set:uri-fragment
 	  char-set:uri-hex
 	  char-set:uri-ipvfuture
-	  char-set:uri-opaque-auth
 	  char-set:uri-query
 	  char-set:uri-reg-name
 	  char-set:uri-scheme
@@ -5535,7 +5534,6 @@ USA.
 	  uri-rexp:ipv6-address
 	  uri-rexp:ipvfuture
 	  uri-rexp:ls32
-	  uri-rexp:opaque-auth
 	  uri-rexp:path
 	  uri-rexp:path-abempty
 	  uri-rexp:path-absolute
diff --git a/src/runtime/url.scm b/src/runtime/url.scm
index b4682f268..667907a0d 100644
--- a/src/runtime/url.scm
+++ b/src/runtime/url.scm
@@ -178,12 +178,10 @@ USA.
   (%uri-authority-port authority))
 
 (define (uri-userinfo? object)
-  (and (string? object)
-       (*match-string parser:userinfo object)))
+  (utf8-string? object))
 
 (define (uri-host? object)
-  (and (string? object)
-       (*match-string matcher:host object)))
+  (utf8-string? object))
 
 (define (uri-port? object)
   (exact-nonnegative-integer? object))
@@ -457,7 +455,12 @@ USA.
 
 (define parser:hostport
   (*parser
-   (seq (map uri-string-downcase (match matcher:host))
+   (seq (map uri-string-downcase
+	     (alt (match matcher:ip-literal)
+		  ;; subsumed by MATCHER:REG-NAME
+		  ;;matcher:ipv4-address
+		  (map decode-component
+		       (match matcher:reg-name))))
 	(alt (seq ":"
 		  (map string->number
 		       (match (+ (char-set char-set:uri-digit)))))
@@ -477,13 +480,6 @@ USA.
 		 (write-char (char-downcase char) output)
 		 (loop)))))))))
 
-(define matcher:host
-  (*matcher
-   (alt matcher:ip-literal
-	;; subsumed by MATCHER:REG-NAME
-	;;matcher:ipv4-address
-	matcher:reg-name)))
-
 (define matcher:ip-literal
   (*matcher
    (seq "["
@@ -627,7 +623,9 @@ USA.
 	(write-encoded userinfo char-set:uri-userinfo output)
 	(write-char #\@ output)))
   (if host
-      (write-encoded host char-set:uri-opaque-auth output))
+      (if (*match-string matcher:ip-literal host)
+	  (write-string host output)
+	  (write-encoded host char-set:uri-reg-name output)))
   (if port
       (begin
 	(write-char #\: output)
@@ -754,9 +752,6 @@ USA.
   (rexp-sequence char-set:uri-alpha
 		 (rexp* char-set:uri-scheme)))
 
-(define (uri-rexp:opaque-auth)
-  (rexp* char-set:uri-opaque-auth))
-
 (define (uri-rexp:authority)
   (rexp-sequence (rexp-optional (uri-rexp:userinfo) "@")
 		 (uri-rexp:host)
@@ -887,13 +882,13 @@ USA.
 (define char-set:uri-hex)
 (define char-set:uri-scheme)
 (define char-set:uri-userinfo)
-(define char-set:uri-opaque-auth)
 (define char-set:uri-ipvfuture)
 (define char-set:uri-reg-name)
 (define char-set:uri-segment)
 (define char-set:uri-segment-nc)
 (define char-set:uri-query)
 (define char-set:uri-fragment)
+(define char-set:uri-sloppy-auth)
 
 (define parser:userinfo)
 (define matcher:reg-name)
@@ -916,24 +911,23 @@ USA.
 	(char-set-union char-set:uri-alpha
 			char-set:uri-digit
 			(string->char-set "+-.")))
-  (let* ((uri-char
+  (let* ((gen-delims (string->char-set ":/?#[]@"))
+	 (sub-delims (string->char-set "!$&'()*+,;="))
+	 (unreserved
 	  (char-set-union char-set:uri-alpha
 			  char-set:uri-digit
-			  (string->char-set "!$&'()*+,-./:;=?@_~")))
+			  (string->char-set "-._~")))
 	 (component-chars
-	  (lambda (free)
-	    (char-set-difference uri-char (string->char-set free)))))
-    (set! char-set:uri-userinfo		(component-chars "/?@"))
+	  (lambda (extra)
+	    (char-set-union unreserved sub-delims (string->char-set extra)))))
+    (set! char-set:uri-userinfo		(component-chars ":"))
     (set! char-set:uri-ipvfuture	char-set:uri-userinfo)
-    (set! char-set:uri-reg-name		(component-chars "/:?@"))
-    (set! char-set:uri-segment		(component-chars "/?"))
-    (set! char-set:uri-segment-nc	(component-chars "/:?"))
-    (set! char-set:uri-query		uri-char)
-    (set! char-set:uri-fragment		uri-char)
-
-    (set! char-set:uri-opaque-auth
-	  (char-set-union (component-chars "/?")
-			  (string->char-set "[]"))))
+    (set! char-set:uri-reg-name		(component-chars ""))
+    (set! char-set:uri-segment		(component-chars ":@"))
+    (set! char-set:uri-segment-nc	(component-chars "@"))
+    (set! char-set:uri-query		(component-chars ":@/?"))
+    (set! char-set:uri-fragment		char-set:uri-query)
+    (set! char-set:uri-sloppy-auth	(component-chars ":@[]")))
 
   (set! parser:userinfo		(component-parser-* char-set:uri-userinfo))
   (set! matcher:reg-name	(component-matcher-* char-set:uri-reg-name))
@@ -1249,7 +1243,7 @@ USA.
   (EOF))
 
 (define-ppu-state authority
-  (opaque-auth (push) authority)
+  (sloppy-auth (push) authority)
   (/ (set authority) (push) path)
   (? (set authority) query)
   (|#| (set authority) fragment)