Complete rewrite of URL support. New design implements generic codec
authorChris Hanson <org/chris-hanson/cph>
Tue, 24 May 2005 04:50:50 +0000 (04:50 +0000)
committerChris Hanson <org/chris-hanson/cph>
Tue, 24 May 2005 04:50:50 +0000 (04:50 +0000)
support for URIs as defined in RFC 2396, which is both more general
and easier to use than the old design.

All names have been changed to use the string "uri" rather than "url".
A minimal number of URL procedures has been retained to support IMAIL
until it is rewritten to use the new design.

The package has been renamed to '(runtime uri).

v7/src/runtime/ed-ffi.scm
v7/src/runtime/make.scm
v7/src/runtime/runtime.pkg
v7/src/runtime/symbol.scm
v7/src/runtime/unicode.scm
v7/src/runtime/url.scm

index 36128c98b141f17be4f33617e1c44f63872c9fb1..1ed3d43b2343b30d99ae33f8cf540a9b9a9ad75a 100644 (file)
@@ -1,9 +1,9 @@
 #| -*- Scheme -*-
 
-$Id: ed-ffi.scm,v 1.36 2003/06/08 05:07:00 cph Exp $
+$Id: ed-ffi.scm,v 1.37 2005/05/24 04:50:08 cph Exp $
 
 Copyright (c) 1991,1996,1997,1999,2000 Massachusetts Institute of Technology
-Copyright (c) 2001,2002,2003 Massachusetts Institute of Technology
+Copyright (c) 2001,2002,2003,2005 Massachusetts Institute of Technology
 
 This file is part of MIT/GNU Scheme.
 
@@ -167,7 +167,7 @@ USA.
     ("unxprm"  (runtime os-primitives))
     ("unxpth"  (runtime pathname unix))
     ("uproc"   (runtime procedure))
-    ("url"     (runtime url))
+    ("url"     (runtime uri))
     ("urtrap"  (runtime reference-trap))
     ("usrint"  (runtime user-interface))
     ("utabs"   (runtime microcode-tables))
index c3ba4b37410e519587a7922c6c0cdab7dc823f0e..a1673e8a0d48d7b0d2842729e1a974a15bfaf4fe 100644 (file)
@@ -1,10 +1,10 @@
 #| -*-Scheme-*-
 
-$Id: make.scm,v 14.97 2004/12/13 03:22:21 cph Exp $
+$Id: make.scm,v 14.98 2005/05/24 04:50:17 cph Exp $
 
 Copyright 1988,1989,1990,1991,1992,1993 Massachusetts Institute of Technology
 Copyright 1994,1995,1996,1997,1998,2000 Massachusetts Institute of Technology
-Copyright 2001,2002,2003,2004 Massachusetts Institute of Technology
+Copyright 2001,2002,2003,2004,2005 Massachusetts Institute of Technology
 
 This file is part of MIT/GNU Scheme.
 
@@ -522,7 +522,7 @@ USA.
    (RUNTIME EMACS-INTERFACE)
    ;; More debugging
    ((RUNTIME CONTINUATION-PARSER) INITIALIZE-SPECIAL-FRAMES! #f)
-   (RUNTIME URL)))
+   (RUNTIME URI)))
 
 (if (eq? os-name 'NT)
     (package-initialize '(RUNTIME WIN32-REGISTRY) 'INITIALIZE-PACKAGE! #f))
index e4cf6701ca30ab31a01ad1c879cd24c541820809..a117021350d2a8432f5b0c4687945f5f4c12e11d 100644 (file)
@@ -1,6 +1,6 @@
 #| -*-Scheme-*-
 
-$Id: runtime.pkg,v 14.543 2005/05/20 04:08:10 cph Exp $
+$Id: runtime.pkg,v 14.544 2005/05/24 04:50:28 cph Exp $
 
 Copyright 1988,1989,1990,1991,1992,1993 Massachusetts Institute of Technology
 Copyright 1994,1995,1996,1997,1998,1999 Massachusetts Institute of Technology
@@ -4633,9 +4633,29 @@ USA.
          char-in-alphabet?
          char-set->alphabet
          code-points->alphabet
+         error:not-8-bit-alphabet
+         error:not-alphabet
+         error:not-unicode-code-point
+         error:not-utf16-be-string
+         error:not-utf16-le-string
+         error:not-utf16-string
+         error:not-utf32-be-string
+         error:not-utf32-le-string
+         error:not-utf32-string
+         error:not-utf8-string
+         error:not-well-formed-code-point-list
+         error:not-wide-char
+         error:not-wide-string
          guarantee-8-bit-alphabet
          guarantee-alphabet
          guarantee-unicode-code-point
+         guarantee-utf16-be-string
+         guarantee-utf16-le-string
+         guarantee-utf16-string
+         guarantee-utf32-be-string
+         guarantee-utf32-le-string
+         guarantee-utf32-string
+         guarantee-utf8-string
          guarantee-well-formed-code-point-list
          guarantee-wide-char
          guarantee-wide-string
@@ -4666,29 +4686,37 @@ USA.
          read-utf32-le-char
          read-utf8-char
          string->alphabet
+         string->utf8-string
          string->wide-string
          unicode-code-point?
          utf16-be-string->wide-string
          utf16-be-string-length
          utf16-be-string-valid?
+         utf16-be-string?
          utf16-le-string->wide-string
          utf16-le-string-length
          utf16-le-string-valid?
+         utf16-le-string?
          utf16-string->wide-string
          utf16-string-length
          utf16-string-valid?
+         utf16-string?
          utf32-be-string->wide-string
          utf32-be-string-length
          utf32-be-string-valid?
+         utf32-be-string?
          utf32-le-string->wide-string
          utf32-le-string-length
          utf32-le-string-valid?
+         utf32-le-string?
          utf32-string->wide-string
          utf32-string-length
          utf32-string-valid?
+         utf32-string?
          utf8-string->wide-string
          utf8-string-length
          utf8-string-valid?
+         utf8-string?
          well-formed-code-point-list?
          wide-char?
          wide-string
@@ -4723,37 +4751,83 @@ USA.
   (export (runtime input-port)
          wide-string-contents))
 
-(define-package (runtime url)
+(define-package (runtime uri)
   (files "url")
   (parent (runtime))
   (export ()
-         url:char-set:alpha
-         url:char-set:alphadigit
-         url:char-set:digit
-         url:char-set:escaped
-         url:char-set:extra
-         url:char-set:lowalpha
-         url:char-set:national
-         url:char-set:punctuation
-         url:char-set:reserved
-         url:char-set:safe
-         url:char-set:scheme
-         url:char-set:unescaped
+         (url:decode-string decode-component)
+         (url:match:escape match-escape)
+         (url:parse:hostport parse-hostport)
+         ->uri
+         absolute-uri?
+         char-set:uri-alpha
+         char-set:uri-alphanum
+         char-set:uri-digit
+         char-set:uri-hex
+         char-set:uri-pchar
+         char-set:uri-reg-name
+         char-set:uri-rel-segment
+         char-set:uri-scheme
+         char-set:uri-userinfo
+         char-set:uric
+         char-set:uric-no-slash
+         error:not-absolute-uri
+         error:not-heirarchical-uri
+         error:not-opaque-uri
+         error:not-relative-uri
+         error:not-uri
+         error:not-uri-authority
+         error:not-uri-host
+         error:not-uri-path
+         error:not-uri-port
+         error:not-uri-registry-name
+         error:not-uri-scheme
+         error:not-uri-server
+         guarantee-absolute-uri
+         guarantee-heirarchical-uri
+         guarantee-opaque-uri
+         guarantee-relative-uri
+         guarantee-uri
+         guarantee-uri-authority
+         guarantee-uri-host
+         guarantee-uri-path
+         guarantee-uri-port
+         guarantee-uri-registry-name
+         guarantee-uri-scheme
+         guarantee-uri-server
+         heirarchical-uri?
+         make-uri
+         make-uri-server
+         opaque-uri?
+         parse-uri
+         relative-uri?
+         string->uri
+         uri->string
+         uri-absolute?
+         uri-authority
+         uri-authority?
+         uri-fragment
+         uri-heirarchical?
+         uri-host?
+         uri-opaque?
+         uri-path
+         uri-path-absolute?
+         uri-path-relative?
+         uri-path?
+         uri-port?
+         uri-query
+         uri-registry-name?
+         uri-relative?
+         uri-scheme
+         uri-scheme?
+         uri-server-host
+         uri-server-port
+         uri-server-userinfo
+         uri-server?
+         uri?
          url:char-set:unreserved
-         url:decode-string
-         url:decode-substring
          url:encode-string
-         url:encode-substring
-         url:match:escape
-         url:match:host
-         url:match:hostname
-         url:match:hostnumber
-         url:match:uchar
-         url:match:xchar
-         url:parse:hostport
-         url:parse:scheme
-         url:string-encoded?
-         url:substring-encoded?))
+         write-uri))
 
 (define-package (runtime postgresql)
   (file-case options
index cba4d08903bf10f95df4e68513348cedec816d28..10f78e9ad413974f36f99934a769733fa50c8ab6 100644 (file)
@@ -1,8 +1,8 @@
 #| -*-Scheme-*-
 
-$Id: symbol.scm,v 1.17 2004/12/23 04:43:48 cph Exp $
+$Id: symbol.scm,v 1.18 2005/05/24 04:50:35 cph Exp $
 
-Copyright 1992,1993,2001,2003,2004 Massachusetts Institute of Technology
+Copyright 1992,1993,2001,2003,2004,2005 Massachusetts Institute of Technology
 
 This file is part of MIT/GNU Scheme.
 
@@ -57,9 +57,9 @@ USA.
                                      (make-unmapped-unbound-reference-trap)))
 
 (define (utf8-string->uninterned-symbol string)
-  (guarantee-string string 'UTF8-STRING->UNINTERNED-SYMBOL)
+  (guarantee-utf8-string string 'UTF8-STRING->UNINTERNED-SYMBOL)
   ((ucode-primitive system-pair-cons) (ucode-type uninterned-symbol)
-                                     string
+                                     (string-copy string)
                                      (make-unmapped-unbound-reference-trap)))
 
 (define (string->symbol string)
@@ -71,7 +71,7 @@ USA.
        ((ucode-primitive string->symbol) string*))))
 
 (define (utf8-string->symbol string)
-  (guarantee-string string 'UTF8-STRING->SYMBOL)
+  (guarantee-utf8-string string 'UTF8-STRING->SYMBOL)
   (or ((ucode-primitive find-symbol) string)
       ((ucode-primitive string->symbol) (string-copy string))))
 
@@ -80,7 +80,7 @@ USA.
 
 (define (substring->symbol string start end)
   (guarantee-substring string start end 'SUBSTRING->SYMBOL)
-  ((ucode-primitive string->symbol) (substring->utf8-string string start end)))
+  ((ucode-primitive string->symbol) (string->utf8-string string start end)))
 
 (define (string-head->symbol string end)
   (substring->symbol string 0 end))
@@ -101,50 +101,6 @@ USA.
        ((not object) "")
        (else (error:wrong-type-argument object "symbol component" 'SYMBOL))))
 \f
-(define (string->utf8-string string)
-  (let ((end (string-length string)))
-    (let ((n (count-non-ascii string 0 end)))
-      (if (fix:> n 0)
-         (let ((string* (make-string (fix:+ end n))))
-           (%substring->utf8-string string 0 end string*)
-           string*)
-         string))))
-
-(define (substring->utf8-string string start end)
-  (let ((string*
-        (make-string
-         (fix:+ (fix:- end start)
-                (count-non-ascii string start end)))))
-    (%substring->utf8-string string start end string*)
-    string*))
-
-(define (count-non-ascii string start end)
-  (let loop ((i start) (n 0))
-    (if (fix:< i end)
-       (loop (fix:+ i 1)
-             (if (fix:< (vector-8b-ref string i) #x80)
-                 n
-                 (fix:+ n 1)))
-       n)))
-
-(define (%substring->utf8-string string start end string*)
-  (let loop ((i start) (i* 0))
-    (if (fix:< i end)
-       (if (fix:< (vector-8b-ref string i) #x80)
-           (begin
-             (vector-8b-set! string* i* (vector-8b-ref string i))
-             (loop (fix:+ i 1) (fix:+ i* 1)))
-           (begin
-             (vector-8b-set!
-              string*
-              i*
-              (fix:or #xC0 (fix:lsh (vector-8b-ref string i) -6)))
-             (vector-8b-set!
-              string*
-              (fix:+ i* 1)
-              (fix:or #x80 (fix:and (vector-8b-ref string i) #x3F)))
-             (loop (fix:+ i 1) (fix:+ i* 2)))))))
-\f
 (define (intern string)
   (if (string-lower-case? string)
       (string->symbol string)
index d89f7ce6757c48d15649686150d87d1ab30d0754..80981ccb199a50f65aea83f56fbfd29c8d0eaa30 100644 (file)
@@ -1,8 +1,8 @@
 #| -*-Scheme-*-
 
-$Id: unicode.scm,v 1.21 2004/12/06 21:27:35 cph Exp $
+$Id: unicode.scm,v 1.22 2005/05/24 04:50:43 cph Exp $
 
-Copyright 2001,2003,2004 Massachusetts Institute of Technology
+Copyright 2001,2003,2004,2005 Massachusetts Institute of Technology
 
 This file is part of MIT/GNU Scheme.
 
@@ -148,23 +148,13 @@ USA.
        (fix:= (char-bits object) 0)
        (unicode-code-point? (char-code object))))
 
-(define (guarantee-wide-char object caller)
-  (if (not (wide-char? object))
-      (error:not-wide-char object caller)))
-
-(define (error:not-wide-char object caller)
-  (error:wrong-type-argument object "a Unicode character" caller))
+(define-guarantee wide-char "a Unicode character")
 
 (define (unicode-code-point? object)
   (and (index-fixnum? object)
        (legal-code-32? object)))
 
-(define-integrable (guarantee-unicode-code-point object caller)
-  (if (not (unicode-code-point? object))
-      (error:not-unicode-code-point object caller)))
-
-(define (error:not-unicode-code-point object caller)
-  (error:wrong-type-argument object "a Unicode code point" caller))
+(define-guarantee unicode-code-point "a Unicode code point")
 
 (define-integrable (legal-code-32? pt)
   (if (fix:< pt #x10000)
@@ -185,12 +175,7 @@ USA.
   (high1 #f read-only #t)
   (high2 #f read-only #t))
 
-(define-integrable (guarantee-alphabet object caller)
-  (if (not (alphabet? object))
-      (error:not-alphabet object caller)))
-
-(define (error:not-alphabet object caller)
-  (error:wrong-type-argument object "a Unicode alphabet" caller))
+(define-guarantee alphabet "a Unicode alphabet")
 
 (define-integrable (make-alphabet-low)
   (make-string #x100 (integer->char 0)))
@@ -248,12 +233,7 @@ USA.
           (fix:< (car item) (cdr item)))
       (unicode-code-point? item)))
 
-(define-integrable (guarantee-well-formed-code-point-list object caller)
-  (if (not (well-formed-code-point-list? object))
-      (error:not-well-formed-code-point-list object caller)))
-
-(define (error:not-well-formed-code-point-list object caller)
-  (error:wrong-type-argument object "a Unicode code-point list" caller))
+(define-guarantee well-formed-code-point-list "a Unicode code-point list")
 
 (define (code-points->alphabet items)
   (guarantee-well-formed-code-point-list items 'CODE-POINTS->ALPHABET)
@@ -380,12 +360,7 @@ USA.
               (and (fix:= (vector-8b-ref low i) 0)
                    (loop (fix:+ i 1))))))))
 
-(define-integrable (guarantee-8-bit-alphabet object caller)
-  (if (not (8-bit-alphabet? object))
-      (error:not-8-bit-alphabet object caller)))
-
-(define (error:not-8-bit-alphabet object caller)
-  (error:wrong-type-argument object "an 8-bit alphabet" caller))
+(define-guarantee 8-bit-alphabet "an 8-bit alphabet")
 
 (define (char-set->alphabet char-set)
   (guarantee-char-set char-set 'CHAR-SET->ALPHABET)
@@ -545,6 +520,8 @@ USA.
                               (constructor %make-wide-string))
   (contents #f read-only #t))
 
+(define-guarantee wide-string "a Unicode string")
+
 (define (make-wide-string length #!optional char)
   (%make-wide-string
    (make-vector length
@@ -596,13 +573,6 @@ USA.
        (vector-set! v2 j (vector-ref v1 i))))
     string*))
 \f
-(define-integrable (guarantee-wide-string object caller)
-  (if (not (wide-string? object))
-      (error:not-wide-string object caller)))
-
-(define (error:not-wide-string object caller)
-  (error:wrong-type-argument object "a Unicode string" caller))
-
 (define (wide-string-index? index string)
   (and (index-fixnum? index)
        (fix:< index (%wide-string-length string))))
@@ -821,6 +791,22 @@ USA.
           (legal-code-32? (combiner (n 0) (n 1) (n 2) (n 3)))
           (fix:+ start 4))
       start))
+
+(define (utf32-string? object)
+  (and (string? object)
+       (utf32-string-valid? object)))
+
+(define (utf32-be-string? object)
+  (and (string? object)
+       (utf32-be-string-valid? object)))
+
+(define (utf32-le-string? object)
+  (and (string? object)
+       (utf32-le-string-valid? object)))
+
+(define-guarantee utf32-string "UTF-32 string")
+(define-guarantee utf32-be-string "UTF-32BE string")
+(define-guarantee utf32-le-string "UTF-32LE string")
 \f
 ;;;; UTF-16 representation
 
@@ -929,7 +915,7 @@ USA.
 (define (wide-string->utf16-le-string string #!optional start end)
   (wide-string->utf-string string start end sink-utf16-le-char
                           'WIDE-STRING->UTF16-LE-STRING))
-\f
+
 (define (utf16-string-length string #!optional start end)
   (if (host-big-endian?)
       (%utf16-string-length string start end "16BE" be-bytes->digit16
@@ -950,7 +936,7 @@ USA.
     (encoded-string-length string start end type caller
       (lambda (string start end)
        (validate-utf16-char string start end combiner)))))
-
+\f
 (define (utf16-string-valid? string #!optional start end)
   (if (host-big-endian?)
       (%utf16-string-valid? string start end be-bytes->digit16
@@ -1004,6 +990,22 @@ USA.
   (fix:+ (fix:+ (fix:lsh (fix:and n0 #x3FF) 10)
                (fix:and n1 #x3FF))
         #x10000))
+
+(define (utf16-string? object)
+  (and (string? object)
+       (utf16-string-valid? object)))
+
+(define (utf16-be-string? object)
+  (and (string? object)
+       (utf16-be-string-valid? object)))
+
+(define (utf16-le-string? object)
+  (and (string? object)
+       (utf16-le-string-valid? object)))
+
+(define-guarantee utf16-string "UTF-16 string")
+(define-guarantee utf16-be-string "UTF-16BE string")
+(define-guarantee utf16-le-string "UTF-16LE string")
 \f
 ;;;; UTF-8 representation
 
@@ -1090,6 +1092,42 @@ USA.
 (define (utf8-string-valid? string #!optional start end)
   (with-substring-args string start end 'UTF8-STRING-VALID?
     (encoded-string-valid? string start end validate-utf8-char)))
+
+(define (utf8-string? object)
+  (and (string? object)
+       (utf8-string-valid? object)))
+
+(define-guarantee utf8-string "UTF-8 string")
+
+(define (string->utf8-string string #!optional start end)
+  (with-substring-args string start end 'STRING->UTF8-STRING
+    (let ((string*
+          (make-string
+           (fix:+ (fix:- end start)
+                  (let loop ((i start) (n 0))
+                    (if (fix:< i end)
+                        (loop (fix:+ i 1)
+                              (if (fix:< (vector-8b-ref string i) #x80)
+                                  n
+                                  (fix:+ n 1)))
+                        n))))))
+      (let loop ((i start) (i* 0))
+       (if (fix:< i end)
+           (if (fix:< (vector-8b-ref string i) #x80)
+               (begin
+                 (vector-8b-set! string* i* (vector-8b-ref string i))
+                 (loop (fix:+ i 1) (fix:+ i* 1)))
+               (begin
+                 (vector-8b-set!
+                  string*
+                  i*
+                  (fix:or #xC0 (fix:lsh (vector-8b-ref string i) -6)))
+                 (vector-8b-set!
+                  string*
+                  (fix:+ i* 1)
+                  (fix:or #x80 (fix:and (vector-8b-ref string i) #x3F)))
+                 (loop (fix:+ i 1) (fix:+ i* 2))))))
+      string*)))
 \f
 (define (validate-utf8-char string start end)
 
index 64785d179961ae872b781053c068e2cf30f43614..43e6cf91a6072a2c6c7896736317c644e059a181 100644 (file)
@@ -1,6 +1,6 @@
 #| -*-Scheme-*-
 
-$Id: url.scm,v 1.17 2005/05/20 04:07:54 cph Exp $
+$Id: url.scm,v 1.18 2005/05/24 04:50:50 cph Exp $
 
 Copyright 2000,2001,2003,2004,2005 Massachusetts Institute of Technology
 
@@ -23,186 +23,497 @@ USA.
 
 |#
 
-;;;; URL Encoding
+;;;; URI Encoding
+;;; package: (runtime uri)
+
+;;; Based on RFC 2396 <http://ietf.org/rfc/rfc2396.txt>
 
 (declare (usual-integrations))
 \f
-(define url:char-set:lowalpha)
-(define url:char-set:alpha)
-(define url:char-set:digit)
-(define url:char-set:alphadigit)
-(define url:char-set:scheme)
-(define url:char-set:safe)
-(define url:char-set:extra)
-(define url:char-set:national)
-(define url:char-set:punctuation)
-(define url:char-set:reserved)
-(define url:char-set:hex)
+(define-record-type <uri>
+    (%make-uri scheme authority path-relative? path query fragment)
+    uri?
+  (scheme uri-scheme)
+  (authority uri-authority)
+  (path-relative? uri-path-relative?)
+  (path uri-path)
+  (query uri-query)
+  (fragment uri-fragment))
+
+(define (make-uri scheme authority path-relative? path query fragment)
+  (if scheme (guarantee-uri-scheme scheme 'MAKE-URI))
+  (if authority (guarantee-uri-authority authority 'MAKE-URI))
+  (guarantee-uri-path path 'MAKE-URI)
+  (if query (guarantee-utf8-string query 'MAKE-URI))
+  (if fragment (guarantee-utf8-string fragment 'MAKE-URI))
+  (if (string? path)
+      (begin
+       (if (not scheme) (error:bad-range-argument scheme 'MAKE-URI))
+       (if authority (error:bad-range-argument authority 'MAKE-URI))
+       (if path-relative? (error:bad-range-argument path-relative? 'MAKE-URI))
+       (if query (error:bad-range-argument query 'MAKE-URI))))
+  (if (and scheme path-relative?)
+      (error:bad-range-argument path-relative? 'MAKE-URI))
+  (if (and (null? path) (not authority))
+      (error:bad-range-argument path 'MAKE-URI))
+  (%make-uri scheme authority (if path-relative? #t #f) path query fragment))
+
+(define-integrable (uri-path-absolute? uri)
+  (not (uri-path-relative? uri)))
+
+(define-integrable (uri-relative? uri)
+  (if (uri-scheme uri) #f #t))
+
+(define-integrable (uri-absolute? uri)
+  (if (uri-scheme uri) #t #f))
+
+(define-integrable (uri-opaque? uri)
+  (string? (uri-path uri)))
+
+(define-integrable (uri-heirarchical? uri)
+  (not (uri-opaque? uri)))
+
+(define (relative-uri? object)
+  (and (uri? object)
+       (uri-relative? object)))
+
+(define (absolute-uri? object)
+  (and (uri? object)
+       (uri-absolute? object)))
+
+(define (opaque-uri? object)
+  (and (uri? object)
+       (uri-opaque? object)))
+
+(define (heirarchical-uri? object)
+  (and (uri? object)
+       (uri-heirarchical? object)))
+
+(define-guarantee uri "URI")
+(define-guarantee relative-uri "relative URI")
+(define-guarantee absolute-uri "absolute URI")
+(define-guarantee opaque-uri "opaque URI")
+(define-guarantee heirarchical-uri "heirarchical URI")
+\f
+(define (uri-scheme? object)
+  (and (interned-symbol? object)
+       (complete-match match-scheme (symbol-name object))))
+
+(define (uri-path? object)
+  (or (and (utf8-string? object)
+          (fix:> (string-length object) 0))
+      (list-of-type? object
+       (lambda (elt)
+         (or (utf8-string? elt)
+             (and (pair? elt)
+                  (utf8-string? (car elt))
+                  (list-of-type? (cdr elt) utf8-string?)))))))
+
+(define (uri-authority? object)
+  (or (uri-server? object)
+      (uri-registry-name? object)))
+
+(define (uri-registry-name? object)
+  (and (utf8-string? object)
+       (fix:> (string-length object) 0)))
+
+(define-record-type <uri-server>
+    (%make-uri-server host port userinfo)
+    uri-server?
+  (host uri-server-host)
+  (port uri-server-port)
+  (userinfo uri-server-userinfo))
+
+(define (make-uri-server host port userinfo)
+  (if host (guarantee-uri-host host 'MAKE-URI-SERVER))
+  (if port (guarantee-uri-port port 'MAKE-URI-SERVER))
+  (if userinfo (guarantee-utf8-string userinfo 'MAKE-URI-SERVER))
+  (if (not host)
+      (begin
+       (if port (error:bad-range-argument port 'MAKE-URI-SERVER))
+       (if userinfo (error:bad-range-argument userinfo 'MAKE-URI-SERVER))))
+  (%make-uri-server host port userinfo))
+
+(define (uri-host? object)
+  (and (string? object)
+       (complete-match match-host object)))
+
+(define (uri-port? object)
+  (exact-nonnegative-integer? object))
+
+(define-guarantee uri-scheme "URI scheme")
+(define-guarantee uri-path "URI path")
+(define-guarantee uri-authority "URI authority")
+(define-guarantee uri-registry-name "URI registry name")
+(define-guarantee uri-server "URI server")
+(define-guarantee uri-host "URI host")
+(define-guarantee uri-port "URI port")
+\f
+(define char-set:uri-alpha)
+(define char-set:uri-digit)
+(define char-set:uri-alphanum)
+(define char-set:uri-alphanum-)
+(define char-set:uri-hex)
+(define char-set:uri-scheme)
+(define char-set:uric)
+(define char-set:uric-no-slash)
+(define char-set:uri-reg-name)
+(define char-set:uri-userinfo)
+(define char-set:uri-rel-segment)
+(define char-set:uri-pchar)
+
+(define parse-fragment)
+(define parse-query)
+(define parse-reg-name)
+(define parse-userinfo)
+(define parse-rel-segment)
+(define parse-pchar)
+
 (define url:char-set:unreserved)
 (define url:char-set:unescaped)
-(define url:char-set:escaped)
 
 (define (initialize-package!)
-  (set! url:char-set:lowalpha (string->char-set "abcdefghijklmnopqrstuvwxyz"))
-  (set! url:char-set:alpha
-       (char-set-union url:char-set:lowalpha
-                       (string->char-set "ABCDEFGHIJKLMNOPQRSTUVWXYZ")))
-  (set! url:char-set:digit (string->char-set "0123456789"))
-  (set! url:char-set:alphadigit
-       (char-set-union url:char-set:alpha url:char-set:digit))
-  (set! url:char-set:scheme
-       (char-set-union url:char-set:alphadigit (string->char-set "+-.")))
-  (set! url:char-set:safe (string->char-set "$-_.+"))
-  (set! url:char-set:extra (string->char-set "!*'(),"))
-  (set! url:char-set:national (string->char-set "{}|\\^~[]`"))
-  (set! url:char-set:punctuation (string->char-set "<>#%\""))
-  (set! url:char-set:reserved (string->char-set ";/?:@&="))
-  (set! url:char-set:hex (string->char-set "0123456789abcdefABCDEF"))
+  (set! char-set:uri-alpha
+       (string->char-set
+        "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"))
+  (set! char-set:uri-digit (string->char-set "0123456789"))
+  (set! char-set:uri-alphanum
+       (char-set-union char-set:uri-alpha char-set:uri-digit))
+  (set! char-set:uri-alphanum-
+       (char-set-union char-set:uri-alphanum (char-set #\-)))
+  (set! char-set:uri-hex (string->char-set "0123456789abcdefABCDEF"))
+  (set! char-set:uri-scheme
+       (char-set-union char-set:uri-alphanum (string->char-set "+-.")))
+  (set! char-set:uric
+       (char-set-union char-set:uri-alphanum
+                       (string->char-set "!'()*-._~")  ;mark
+                       (string->char-set "$&+,/:;=?@") ;reserved
+                       ))
+  (let ((component-chars
+        (lambda (free)
+          (char-set-difference char-set:uric (string->char-set free)))))
+    (set! char-set:uric-no-slash       (component-chars "/"))
+    (set! char-set:uri-reg-name                (component-chars "/?"))
+    (set! char-set:uri-userinfo                (component-chars "/?@"))
+    (set! char-set:uri-rel-segment     (component-chars "/:?"))
+    (set! char-set:uri-pchar           (component-chars "/;?")))
+
+  (set! parse-fragment (component-parser-* char-set:uric))
+  (set! parse-query parse-fragment)
+  (set! parse-reg-name (component-parser-+ char-set:uri-reg-name))
+  (set! parse-userinfo (component-parser-* char-set:uri-userinfo))
+  (set! parse-rel-segment (component-parser-+ char-set:uri-rel-segment))
+  (set! parse-pchar (component-parser-* char-set:uri-pchar))
+
+  ;; backwards compatibility:
   (set! url:char-set:unreserved
-       (char-set-union url:char-set:alphadigit
-                       url:char-set:safe
-                       url:char-set:extra))
+       (char-set-union char-set:uri-alphanum
+                       (string->char-set "!$'()*+,-._")))
   (set! url:char-set:unescaped
        (char-set-union url:char-set:unreserved
-                       url:char-set:reserved))
-  (set! url:char-set:escaped
-       (char-set-invert url:char-set:unescaped))
+                       (string->char-set ";/?:@&=")))
   unspecific)
+\f
+;;;; Parser
 
-(define url:match:uchar
-  (*matcher
-   (alt (char-set url:char-set:unreserved)
-       url:match:escape)))
+(define (string->uri string #!optional start end)
+  (let ((v (complete-parse parse-uri string start end)))
+    (and v
+        (vector-ref v 0))))
 
-(define url:match:xchar
-  (*matcher
-   (alt (char-set url:char-set:unescaped)
-       url:match:escape)))
+(define (->uri object)
+  (cond ((uri? object) object)
+       ((string? object) (string->uri object))
+       ((symbol? object) (string->uri (symbol-name object)))
+       (else (error:not-uri object '->URI))))
+
+(define parse-uri
+  (*parser
+   (top-level
+    (seq (alt parse-absolute-uri
+             parse-relative-uri
+             (values #f))
+        (alt (seq "#" parse-fragment)
+             (values #f))))))
+
+(define parse-absolute-uri
+  (*parser
+   (alt (encapsulate (lambda (v)
+                      (let ((path (vector-ref v 1)))
+                        (%make-uri (vector-ref v 0)
+                                   (vector-ref path 0)
+                                   (vector-ref path 1)
+                                   (vector-ref path 2)
+                                   (vector-ref v 2)
+                                   (vector-ref v 3))))
+         (seq parse-scheme
+              ":"
+              (alt parse-net-path parse-abs-path)
+              (alt (seq "?" parse-query)
+                   (values #f))
+              (alt (seq "#" parse-fragment)
+                   (values #f))))
+       (encapsulate (lambda (v)
+                      (%make-uri (vector-ref v 0)
+                                 #f
+                                 #f
+                                 (vector-ref v 1)
+                                 #f
+                                 (vector-ref v 2)))
+         (seq parse-scheme
+              ":"
+              (match (seq (char-set char-set:uric-no-slash)
+                          (* (char-set char-set:uric))))
+              (alt (seq "#" parse-fragment)
+                   (values #f)))))))
 
-(define url:match:escape
+(define parse-scheme
+  (*parser
+   (map intern (match match-scheme))))
+
+(define match-scheme
   (*matcher
-   (seq "%"
-       (char-set url:char-set:hex)
-       (char-set url:char-set:hex))))
+   (seq (char-set char-set:uri-alpha)
+       (* (char-set char-set:uri-scheme)))))
+
+(define parse-relative-uri
+  (*parser
+   (encapsulate (lambda (v)
+                 (let ((path (vector-ref v 0)))
+                   (%make-uri #f
+                              (vector-ref path 0)
+                              (vector-ref path 1)
+                              (vector-ref path 2)
+                              (vector-ref v 1)
+                              (vector-ref v 2))))
+     (seq (alt parse-net-path
+              parse-abs-path
+              parse-rel-path)
+         (alt (seq "?" parse-query)
+              (values #f))
+         (alt (seq "#" parse-fragment)
+              (values #f))))))
 \f
-(define url:parse:scheme
+(define parse-net-path
+  (*parser
+   (encapsulate (lambda (v) (vector (vector-ref v 0) #f (vector-ref v 1)))
+     (seq "//"
+         parse-authority
+         (alt (encapsulate vector->list
+                (* (seq "/" parse-segment)))
+              (values '()))))))
+
+(define parse-abs-path
+  (*parser
+   (map (lambda (p) (vector #f #f p))
+       (encapsulate vector->list
+         (* (seq "/" parse-segment))))))
+
+(define parse-rel-path
   (*parser
-   (map intern
-       (match (+ url:char-set:scheme)))))
+   (map (lambda (p) (vector #f #t p))
+       (encapsulate vector->list
+         (seq parse-rel-segment
+              (* (seq "/" parse-segment)))))))
 
-(define url:parse:hostport
+(define parse-segment
   (*parser
-   (seq (match url:match:host)
-       (alt (map string->number
-                 (seq (noise ":")
-                      (match (+ (char-set url:char-set:digit)))))
+   (encapsulate (lambda (v)
+                 (if (fix:> (vector-length v) 1)
+                     (vector->list v)
+                     (vector-ref v 0)))
+     (seq parse-pchar
+         (* (seq ";" parse-pchar))))))
+
+(define parse-authority
+  (*parser
+   (alt (encapsulate (lambda (v)
+                      (%make-uri-server (vector-ref v 1)
+                                        (vector-ref v 2)
+                                        (vector-ref v 0)))
+         (seq (alt parse-userinfo
+                   (values #f))
+              parse-hostport))
+       parse-reg-name
+       (values (%make-uri-server #f #f #f)))))
+
+(define parse-hostport
+  (*parser
+   (seq (match match-host)
+       (alt (seq (noise ":")
+                 (alt (map string->number (match match-digits))
+                      (values #f)))
             (values #f)))))
 
-(define url:match:host
-  (*matcher (alt url:match:hostname url:match:hostnumber)))
+(define match-host
+  (*matcher (alt match-hostname match-ipv4-address)))
 
-(define url:match:hostname
+(define match-hostname
   (let ((match-tail
         (*matcher
-         (* (alt (char-set url:char-set:alphadigit)
-                 (seq (+ #\-)
-                      (char-set url:char-set:alphadigit)))))))
+         (? (seq (* (char-set char-set:uri-alphanum-))
+                 (char-set char-set:uri-alphanum))))))
     (*matcher
-     (seq (* (seq (char-set url:char-set:alphadigit)
+     (seq (* (seq (char-set char-set:uri-alphanum)
                  match-tail
                  "."))
-         (char-set url:char-set:alpha)
-         match-tail))))
+         (char-set char-set:uri-alpha)
+         match-tail
+         (? ".")))))
 
-(define url:match:hostnumber
+(define match-ipv4-address
   (*matcher
-   (seq (+ (char-set url:char-set:digit))
-       "."
-       (+ (char-set url:char-set:digit))
-       "."
-       (+ (char-set url:char-set:digit))
-       "."
-       (+ (char-set url:char-set:digit)))))
+   (seq match-digits "." match-digits "." match-digits "." match-digits)))
+
+(define match-digits
+  (*matcher (+ (char-set char-set:uri-digit))))
 \f
-(define (url:string-encoded? string)
-  (url:substring-encoded? string 0 (string-length string)))
+;;;; Output
 
-(define url:substring-encoded?
-  (let ((matcher (*matcher (complete (* url:match:xchar)))))
-    (lambda (string start end)
-      (matcher (string->parser-buffer string start end)))))
+(define (uri->string uri)
+  (guarantee-uri uri 'URI->STRING)
+  (call-with-output-string
+    (lambda (port)
+      (%write-uri uri port))))
 
-(define (url:encode-string string)
-  (url:encode-substring string 0 (string-length string)))
-
-(define (url:encode-substring string start end)
-  (let ((n-to-encode
-        (let loop ((start start) (n-to-encode 0))
-          (let ((index
-                 (substring-find-next-char-in-set string start end
-                                                  url:char-set:escaped)))
-            (if index
-                (loop (fix:+ index 1) (fix:+ n-to-encode 1))
-                n-to-encode)))))
-    (if (fix:= 0 n-to-encode)
-       (substring string start end)
-       (let ((encoded
-              (make-string (fix:+ (fix:- end start) (fix:* 2 n-to-encode))))
-             (digits "0123456789ABCDEF"))
-         (let loop ((start start) (i 0))
-           (let ((index
-                  (substring-find-next-char-in-set string start end
-                                                   url:char-set:escaped)))
-             (if index
-                 (begin
-                   (substring-move! string start index encoded i)
-                   (let ((i (fix:+ i (fix:- index start)))
-                         (code (vector-8b-ref string index)))
-                     (string-set! encoded i #\%)
-                     (string-set! encoded
-                                  (fix:+ i 1)
-                                  (string-ref digits (fix:lsh code -4)))
-                     (string-set! encoded
-                                  (fix:+ i 2)
-                                  (string-ref digits (fix:and code #x0F)))
-                     (loop (fix:+ index 1) (fix:+ i 3))))
-                 (substring-move! string start end encoded i))))
-         encoded))))
+(define (uri->symbol uri)
+  (utf8-string->symbol (uri->string uri)))
+
+(define (write-uri uri port)
+  (guarantee-uri uri 'WRITE-URI)
+  (guarantee-port port 'WRITE-URI)
+  (%write-uri uri port))
+
+(define (%write-uri uri port)
+  (let ((scheme (uri-scheme uri))
+       (authority (uri-authority uri))
+       (path-relative? (uri-path-relative? uri))
+       (path (uri-path uri))
+       (query (uri-query uri))
+       (fragment (uri-fragment uri)))
+    (if scheme
+       (begin
+         (write scheme port)
+         (write-char #\: port)))
+    (cond ((string? path)
+          (write-escaped-substring path 0 1 char-set:uric-no-slash port)
+          (write-escaped-substring path 1 (string-length path) char-set:uric
+                                   port))
+         (authority
+          (write-string "//" port)
+          (write-authority authority port)
+          (write-abs-path path port))
+         (path-relative?
+          (write-escaped (car path) char-set:uri-rel-segment port)
+          (write-abs-path (cdr path) port))
+         (else
+          (write-abs-path path port)))
+    (if query
+       (begin
+         (write-char #\? port)
+         (write-escaped query char-set:uric port)))
+    (if fragment
+       (begin
+         (write-char #\# port)
+         (write-escaped fragment char-set:uric port)))))
+
+(define (write-authority authority port)
+  (if (uri-server? authority)
+      (begin
+       (if (uri-server-userinfo authority)
+           (begin
+             (write-escaped (uri-server-userinfo authority)
+                            char-set:uri-userinfo
+                            port)
+             (write-char #\@ port)))
+       (if (uri-server-host authority)
+           (write-string (uri-server-host authority) port))
+       (if (uri-server-port authority)
+           (begin
+             (write-char #\: port)
+             (write (uri-server-port authority) port))))
+      (write-escaped authority char-set:uri-reg-name port)))
+
+(define (write-abs-path path port)
+  (let ((write-pchar
+        (lambda (string)
+          (write-escaped string char-set:uri-pchar port))))
+    (for-each (lambda (segment)
+               (write-char #\/ port)
+               (if (string? segment)
+                   (write-pchar segment)
+                   (for-each write-pchar segment)))
+             path)))
 \f
-(define (url:decode-string string)
-  (url:decode-substring string 0 (string-length string)))
-
-(define (url:decode-substring string start end)
-  (let ((n-encoded
-        (let loop ((start start) (n-encoded 0))
-          (let ((index (substring-find-next-char string start end #\%)))
-            (if index
-                (loop (fix:+ index 1) (fix:+ n-encoded 1))
-                n-encoded))))
-       (lose
-        (lambda ()
-          (error "Malformed encoded URL string:"
-                 (substring string start end)))))
-    (if (fix:= 0 n-encoded)
-       (substring string start end)
-       (let ((decoded
-              (make-string (fix:- (fix:- end start) (fix:* 2 n-encoded)))))
-         (let loop ((start start) (i 0))
-           (let ((index (substring-find-next-char string start end #\%)))
-             (if index
-                 (begin
-                   (if (not (fix:<= (fix:+ index 3) end))
-                       (lose))
-                   (let ((k
-                          (substring->number string
-                                             (fix:+ index 1)
-                                             (fix:+ index 3)
-                                             16))
-                         (i* (fix:+ i (fix:- index start))))
-                     (if (not k)
-                         (lose))
-                     (substring-move! string start index decoded i)
-                     (vector-8b-set! decoded i* k)
-                     (loop (fix:+ index 3) (fix:+ i* 1))))
-                 (substring-move! string start end decoded i))))
-         decoded))))
\ No newline at end of file
+;;;; Escape codecs
+
+(define (component-parser-* cs)
+  (*parser
+   (map decode-component
+       (match (* (alt (char-set cs) match-escape))))))
+
+(define (component-parser-+ cs)
+  (*parser
+   (map decode-component
+       (match (+ (alt (char-set cs) match-escape))))))
+
+(define match-escape
+  (*matcher
+   (seq "%"
+       (char-set char-set:uri-hex)
+       (char-set char-set:uri-hex))))
+
+(define (decode-component string)
+  (if (string-find-next-char string #\%)
+      (call-with-output-string
+       (lambda (port)
+         (let ((end (string-length string)))
+           (let loop ((i 0))
+             (if (fix:< i end)
+                 (if (char=? (string-ref string i) #\%)
+                     (begin
+                       (write-char (integer->char
+                                    (substring->number string
+                                                       (fix:+ i 1)
+                                                       (fix:+ i 3)
+                                                       16
+                                                       #t))
+                                   port)
+                       (loop (fix:+ i 3)))
+                     (begin
+                       (write-char (string-ref string i) port)
+                       (loop (fix:+ i 1)))))))))
+      string))
+
+(define (write-escaped string unescaped port)
+  (write-escaped-substring string 0 (string-length string) unescaped port))
+
+(define (write-escaped-substring string start end unescaped port)
+  (do ((i start (fix:+ i 1)))
+      ((not (fix:< i end)))
+    (let ((char (string-ref string i)))
+      (if (char-set-member? unescaped char)
+         (write-char char port)
+         (let ((s (number->string (char->integer char) 16)))
+           (write-char #\% port)
+           (if (fix:= (string-length s) 1)
+               (write-char #\0 port))
+           (write-string s port))))))
+
+(define (complete-match matcher string #!optional start end)
+  (let ((buffer (string->parser-buffer string start end)))
+    (and (matcher buffer)
+        (not (peek-parser-buffer-char buffer)))))
+
+(define (complete-parse parser string #!optional start end)
+  (let ((buffer (string->parser-buffer string start end)))
+    (let ((v (parser buffer)))
+      (and v
+          (not (peek-parser-buffer-char buffer))
+          v))))
+
+;; backwards compatibility:
+(define (url:encode-string string)
+  (call-with-output-string
+    (lambda (port)
+      (write-escaped string url:char-set:unescaped port))))
\ No newline at end of file