#| -*-Scheme-*-
-$Id: unicode.scm,v 1.21 2004/12/06 21:27:35 cph Exp $
+$Id: unicode.scm,v 1.22 2005/05/24 04:50:43 cph Exp $
-Copyright 2001,2003,2004 Massachusetts Institute of Technology
+Copyright 2001,2003,2004,2005 Massachusetts Institute of Technology
This file is part of MIT/GNU Scheme.
(fix:= (char-bits object) 0)
(unicode-code-point? (char-code object))))
-(define (guarantee-wide-char object caller)
- (if (not (wide-char? object))
- (error:not-wide-char object caller)))
-
-(define (error:not-wide-char object caller)
- (error:wrong-type-argument object "a Unicode character" caller))
+(define-guarantee wide-char "a Unicode character")
(define (unicode-code-point? object)
(and (index-fixnum? object)
(legal-code-32? object)))
-(define-integrable (guarantee-unicode-code-point object caller)
- (if (not (unicode-code-point? object))
- (error:not-unicode-code-point object caller)))
-
-(define (error:not-unicode-code-point object caller)
- (error:wrong-type-argument object "a Unicode code point" caller))
+(define-guarantee unicode-code-point "a Unicode code point")
(define-integrable (legal-code-32? pt)
(if (fix:< pt #x10000)
(high1 #f read-only #t)
(high2 #f read-only #t))
-(define-integrable (guarantee-alphabet object caller)
- (if (not (alphabet? object))
- (error:not-alphabet object caller)))
-
-(define (error:not-alphabet object caller)
- (error:wrong-type-argument object "a Unicode alphabet" caller))
+(define-guarantee alphabet "a Unicode alphabet")
(define-integrable (make-alphabet-low)
(make-string #x100 (integer->char 0)))
(fix:< (car item) (cdr item)))
(unicode-code-point? item)))
-(define-integrable (guarantee-well-formed-code-point-list object caller)
- (if (not (well-formed-code-point-list? object))
- (error:not-well-formed-code-point-list object caller)))
-
-(define (error:not-well-formed-code-point-list object caller)
- (error:wrong-type-argument object "a Unicode code-point list" caller))
+(define-guarantee well-formed-code-point-list "a Unicode code-point list")
(define (code-points->alphabet items)
(guarantee-well-formed-code-point-list items 'CODE-POINTS->ALPHABET)
(and (fix:= (vector-8b-ref low i) 0)
(loop (fix:+ i 1))))))))
-(define-integrable (guarantee-8-bit-alphabet object caller)
- (if (not (8-bit-alphabet? object))
- (error:not-8-bit-alphabet object caller)))
-
-(define (error:not-8-bit-alphabet object caller)
- (error:wrong-type-argument object "an 8-bit alphabet" caller))
+(define-guarantee 8-bit-alphabet "an 8-bit alphabet")
(define (char-set->alphabet char-set)
(guarantee-char-set char-set 'CHAR-SET->ALPHABET)
(constructor %make-wide-string))
(contents #f read-only #t))
+(define-guarantee wide-string "a Unicode string")
+
(define (make-wide-string length #!optional char)
(%make-wide-string
(make-vector length
(vector-set! v2 j (vector-ref v1 i))))
string*))
\f
-(define-integrable (guarantee-wide-string object caller)
- (if (not (wide-string? object))
- (error:not-wide-string object caller)))
-
-(define (error:not-wide-string object caller)
- (error:wrong-type-argument object "a Unicode string" caller))
-
(define (wide-string-index? index string)
(and (index-fixnum? index)
(fix:< index (%wide-string-length string))))
(legal-code-32? (combiner (n 0) (n 1) (n 2) (n 3)))
(fix:+ start 4))
start))
+
+(define (utf32-string? object)
+ (and (string? object)
+ (utf32-string-valid? object)))
+
+(define (utf32-be-string? object)
+ (and (string? object)
+ (utf32-be-string-valid? object)))
+
+(define (utf32-le-string? object)
+ (and (string? object)
+ (utf32-le-string-valid? object)))
+
+(define-guarantee utf32-string "UTF-32 string")
+(define-guarantee utf32-be-string "UTF-32BE string")
+(define-guarantee utf32-le-string "UTF-32LE string")
\f
;;;; UTF-16 representation
(define (wide-string->utf16-le-string string #!optional start end)
(wide-string->utf-string string start end sink-utf16-le-char
'WIDE-STRING->UTF16-LE-STRING))
-\f
+
(define (utf16-string-length string #!optional start end)
(if (host-big-endian?)
(%utf16-string-length string start end "16BE" be-bytes->digit16
(encoded-string-length string start end type caller
(lambda (string start end)
(validate-utf16-char string start end combiner)))))
-
+\f
(define (utf16-string-valid? string #!optional start end)
(if (host-big-endian?)
(%utf16-string-valid? string start end be-bytes->digit16
(fix:+ (fix:+ (fix:lsh (fix:and n0 #x3FF) 10)
(fix:and n1 #x3FF))
#x10000))
+
+(define (utf16-string? object)
+ (and (string? object)
+ (utf16-string-valid? object)))
+
+(define (utf16-be-string? object)
+ (and (string? object)
+ (utf16-be-string-valid? object)))
+
+(define (utf16-le-string? object)
+ (and (string? object)
+ (utf16-le-string-valid? object)))
+
+(define-guarantee utf16-string "UTF-16 string")
+(define-guarantee utf16-be-string "UTF-16BE string")
+(define-guarantee utf16-le-string "UTF-16LE string")
\f
;;;; UTF-8 representation
(define (utf8-string-valid? string #!optional start end)
(with-substring-args string start end 'UTF8-STRING-VALID?
(encoded-string-valid? string start end validate-utf8-char)))
+
+(define (utf8-string? object)
+ (and (string? object)
+ (utf8-string-valid? object)))
+
+(define-guarantee utf8-string "UTF-8 string")
+
+(define (string->utf8-string string #!optional start end)
+ (with-substring-args string start end 'STRING->UTF8-STRING
+ (let ((string*
+ (make-string
+ (fix:+ (fix:- end start)
+ (let loop ((i start) (n 0))
+ (if (fix:< i end)
+ (loop (fix:+ i 1)
+ (if (fix:< (vector-8b-ref string i) #x80)
+ n
+ (fix:+ n 1)))
+ n))))))
+ (let loop ((i start) (i* 0))
+ (if (fix:< i end)
+ (if (fix:< (vector-8b-ref string i) #x80)
+ (begin
+ (vector-8b-set! string* i* (vector-8b-ref string i))
+ (loop (fix:+ i 1) (fix:+ i* 1)))
+ (begin
+ (vector-8b-set!
+ string*
+ i*
+ (fix:or #xC0 (fix:lsh (vector-8b-ref string i) -6)))
+ (vector-8b-set!
+ string*
+ (fix:+ i* 1)
+ (fix:or #x80 (fix:and (vector-8b-ref string i) #x3F)))
+ (loop (fix:+ i 1) (fix:+ i* 2))))))
+ string*)))
\f
(define (validate-utf8-char string start end)
#| -*-Scheme-*-
-$Id: url.scm,v 1.17 2005/05/20 04:07:54 cph Exp $
+$Id: url.scm,v 1.18 2005/05/24 04:50:50 cph Exp $
Copyright 2000,2001,2003,2004,2005 Massachusetts Institute of Technology
|#
-;;;; URL Encoding
+;;;; URI Encoding
+;;; package: (runtime uri)
+
+;;; Based on RFC 2396 <http://ietf.org/rfc/rfc2396.txt>
(declare (usual-integrations))
\f
-(define url:char-set:lowalpha)
-(define url:char-set:alpha)
-(define url:char-set:digit)
-(define url:char-set:alphadigit)
-(define url:char-set:scheme)
-(define url:char-set:safe)
-(define url:char-set:extra)
-(define url:char-set:national)
-(define url:char-set:punctuation)
-(define url:char-set:reserved)
-(define url:char-set:hex)
+(define-record-type <uri>
+ (%make-uri scheme authority path-relative? path query fragment)
+ uri?
+ (scheme uri-scheme)
+ (authority uri-authority)
+ (path-relative? uri-path-relative?)
+ (path uri-path)
+ (query uri-query)
+ (fragment uri-fragment))
+
+(define (make-uri scheme authority path-relative? path query fragment)
+ (if scheme (guarantee-uri-scheme scheme 'MAKE-URI))
+ (if authority (guarantee-uri-authority authority 'MAKE-URI))
+ (guarantee-uri-path path 'MAKE-URI)
+ (if query (guarantee-utf8-string query 'MAKE-URI))
+ (if fragment (guarantee-utf8-string fragment 'MAKE-URI))
+ (if (string? path)
+ (begin
+ (if (not scheme) (error:bad-range-argument scheme 'MAKE-URI))
+ (if authority (error:bad-range-argument authority 'MAKE-URI))
+ (if path-relative? (error:bad-range-argument path-relative? 'MAKE-URI))
+ (if query (error:bad-range-argument query 'MAKE-URI))))
+ (if (and scheme path-relative?)
+ (error:bad-range-argument path-relative? 'MAKE-URI))
+ (if (and (null? path) (not authority))
+ (error:bad-range-argument path 'MAKE-URI))
+ (%make-uri scheme authority (if path-relative? #t #f) path query fragment))
+
+(define-integrable (uri-path-absolute? uri)
+ (not (uri-path-relative? uri)))
+
+(define-integrable (uri-relative? uri)
+ (if (uri-scheme uri) #f #t))
+
+(define-integrable (uri-absolute? uri)
+ (if (uri-scheme uri) #t #f))
+
+(define-integrable (uri-opaque? uri)
+ (string? (uri-path uri)))
+
+(define-integrable (uri-heirarchical? uri)
+ (not (uri-opaque? uri)))
+
+(define (relative-uri? object)
+ (and (uri? object)
+ (uri-relative? object)))
+
+(define (absolute-uri? object)
+ (and (uri? object)
+ (uri-absolute? object)))
+
+(define (opaque-uri? object)
+ (and (uri? object)
+ (uri-opaque? object)))
+
+(define (heirarchical-uri? object)
+ (and (uri? object)
+ (uri-heirarchical? object)))
+
+(define-guarantee uri "URI")
+(define-guarantee relative-uri "relative URI")
+(define-guarantee absolute-uri "absolute URI")
+(define-guarantee opaque-uri "opaque URI")
+(define-guarantee heirarchical-uri "heirarchical URI")
+\f
+(define (uri-scheme? object)
+ (and (interned-symbol? object)
+ (complete-match match-scheme (symbol-name object))))
+
+(define (uri-path? object)
+ (or (and (utf8-string? object)
+ (fix:> (string-length object) 0))
+ (list-of-type? object
+ (lambda (elt)
+ (or (utf8-string? elt)
+ (and (pair? elt)
+ (utf8-string? (car elt))
+ (list-of-type? (cdr elt) utf8-string?)))))))
+
+(define (uri-authority? object)
+ (or (uri-server? object)
+ (uri-registry-name? object)))
+
+(define (uri-registry-name? object)
+ (and (utf8-string? object)
+ (fix:> (string-length object) 0)))
+
+(define-record-type <uri-server>
+ (%make-uri-server host port userinfo)
+ uri-server?
+ (host uri-server-host)
+ (port uri-server-port)
+ (userinfo uri-server-userinfo))
+
+(define (make-uri-server host port userinfo)
+ (if host (guarantee-uri-host host 'MAKE-URI-SERVER))
+ (if port (guarantee-uri-port port 'MAKE-URI-SERVER))
+ (if userinfo (guarantee-utf8-string userinfo 'MAKE-URI-SERVER))
+ (if (not host)
+ (begin
+ (if port (error:bad-range-argument port 'MAKE-URI-SERVER))
+ (if userinfo (error:bad-range-argument userinfo 'MAKE-URI-SERVER))))
+ (%make-uri-server host port userinfo))
+
+(define (uri-host? object)
+ (and (string? object)
+ (complete-match match-host object)))
+
+(define (uri-port? object)
+ (exact-nonnegative-integer? object))
+
+(define-guarantee uri-scheme "URI scheme")
+(define-guarantee uri-path "URI path")
+(define-guarantee uri-authority "URI authority")
+(define-guarantee uri-registry-name "URI registry name")
+(define-guarantee uri-server "URI server")
+(define-guarantee uri-host "URI host")
+(define-guarantee uri-port "URI port")
+\f
+(define char-set:uri-alpha)
+(define char-set:uri-digit)
+(define char-set:uri-alphanum)
+(define char-set:uri-alphanum-)
+(define char-set:uri-hex)
+(define char-set:uri-scheme)
+(define char-set:uric)
+(define char-set:uric-no-slash)
+(define char-set:uri-reg-name)
+(define char-set:uri-userinfo)
+(define char-set:uri-rel-segment)
+(define char-set:uri-pchar)
+
+(define parse-fragment)
+(define parse-query)
+(define parse-reg-name)
+(define parse-userinfo)
+(define parse-rel-segment)
+(define parse-pchar)
+
(define url:char-set:unreserved)
(define url:char-set:unescaped)
-(define url:char-set:escaped)
(define (initialize-package!)
- (set! url:char-set:lowalpha (string->char-set "abcdefghijklmnopqrstuvwxyz"))
- (set! url:char-set:alpha
- (char-set-union url:char-set:lowalpha
- (string->char-set "ABCDEFGHIJKLMNOPQRSTUVWXYZ")))
- (set! url:char-set:digit (string->char-set "0123456789"))
- (set! url:char-set:alphadigit
- (char-set-union url:char-set:alpha url:char-set:digit))
- (set! url:char-set:scheme
- (char-set-union url:char-set:alphadigit (string->char-set "+-.")))
- (set! url:char-set:safe (string->char-set "$-_.+"))
- (set! url:char-set:extra (string->char-set "!*'(),"))
- (set! url:char-set:national (string->char-set "{}|\\^~[]`"))
- (set! url:char-set:punctuation (string->char-set "<>#%\""))
- (set! url:char-set:reserved (string->char-set ";/?:@&="))
- (set! url:char-set:hex (string->char-set "0123456789abcdefABCDEF"))
+ (set! char-set:uri-alpha
+ (string->char-set
+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"))
+ (set! char-set:uri-digit (string->char-set "0123456789"))
+ (set! char-set:uri-alphanum
+ (char-set-union char-set:uri-alpha char-set:uri-digit))
+ (set! char-set:uri-alphanum-
+ (char-set-union char-set:uri-alphanum (char-set #\-)))
+ (set! char-set:uri-hex (string->char-set "0123456789abcdefABCDEF"))
+ (set! char-set:uri-scheme
+ (char-set-union char-set:uri-alphanum (string->char-set "+-.")))
+ (set! char-set:uric
+ (char-set-union char-set:uri-alphanum
+ (string->char-set "!'()*-._~") ;mark
+ (string->char-set "$&+,/:;=?@") ;reserved
+ ))
+ (let ((component-chars
+ (lambda (free)
+ (char-set-difference char-set:uric (string->char-set free)))))
+ (set! char-set:uric-no-slash (component-chars "/"))
+ (set! char-set:uri-reg-name (component-chars "/?"))
+ (set! char-set:uri-userinfo (component-chars "/?@"))
+ (set! char-set:uri-rel-segment (component-chars "/:?"))
+ (set! char-set:uri-pchar (component-chars "/;?")))
+
+ (set! parse-fragment (component-parser-* char-set:uric))
+ (set! parse-query parse-fragment)
+ (set! parse-reg-name (component-parser-+ char-set:uri-reg-name))
+ (set! parse-userinfo (component-parser-* char-set:uri-userinfo))
+ (set! parse-rel-segment (component-parser-+ char-set:uri-rel-segment))
+ (set! parse-pchar (component-parser-* char-set:uri-pchar))
+
+ ;; backwards compatibility:
(set! url:char-set:unreserved
- (char-set-union url:char-set:alphadigit
- url:char-set:safe
- url:char-set:extra))
+ (char-set-union char-set:uri-alphanum
+ (string->char-set "!$'()*+,-._")))
(set! url:char-set:unescaped
(char-set-union url:char-set:unreserved
- url:char-set:reserved))
- (set! url:char-set:escaped
- (char-set-invert url:char-set:unescaped))
+ (string->char-set ";/?:@&=")))
unspecific)
+\f
+;;;; Parser
-(define url:match:uchar
- (*matcher
- (alt (char-set url:char-set:unreserved)
- url:match:escape)))
+(define (string->uri string #!optional start end)
+ (let ((v (complete-parse parse-uri string start end)))
+ (and v
+ (vector-ref v 0))))
-(define url:match:xchar
- (*matcher
- (alt (char-set url:char-set:unescaped)
- url:match:escape)))
+(define (->uri object)
+ (cond ((uri? object) object)
+ ((string? object) (string->uri object))
+ ((symbol? object) (string->uri (symbol-name object)))
+ (else (error:not-uri object '->URI))))
+
+(define parse-uri
+ (*parser
+ (top-level
+ (seq (alt parse-absolute-uri
+ parse-relative-uri
+ (values #f))
+ (alt (seq "#" parse-fragment)
+ (values #f))))))
+
+(define parse-absolute-uri
+ (*parser
+ (alt (encapsulate (lambda (v)
+ (let ((path (vector-ref v 1)))
+ (%make-uri (vector-ref v 0)
+ (vector-ref path 0)
+ (vector-ref path 1)
+ (vector-ref path 2)
+ (vector-ref v 2)
+ (vector-ref v 3))))
+ (seq parse-scheme
+ ":"
+ (alt parse-net-path parse-abs-path)
+ (alt (seq "?" parse-query)
+ (values #f))
+ (alt (seq "#" parse-fragment)
+ (values #f))))
+ (encapsulate (lambda (v)
+ (%make-uri (vector-ref v 0)
+ #f
+ #f
+ (vector-ref v 1)
+ #f
+ (vector-ref v 2)))
+ (seq parse-scheme
+ ":"
+ (match (seq (char-set char-set:uric-no-slash)
+ (* (char-set char-set:uric))))
+ (alt (seq "#" parse-fragment)
+ (values #f)))))))
-(define url:match:escape
+(define parse-scheme
+ (*parser
+ (map intern (match match-scheme))))
+
+(define match-scheme
(*matcher
- (seq "%"
- (char-set url:char-set:hex)
- (char-set url:char-set:hex))))
+ (seq (char-set char-set:uri-alpha)
+ (* (char-set char-set:uri-scheme)))))
+
+(define parse-relative-uri
+ (*parser
+ (encapsulate (lambda (v)
+ (let ((path (vector-ref v 0)))
+ (%make-uri #f
+ (vector-ref path 0)
+ (vector-ref path 1)
+ (vector-ref path 2)
+ (vector-ref v 1)
+ (vector-ref v 2))))
+ (seq (alt parse-net-path
+ parse-abs-path
+ parse-rel-path)
+ (alt (seq "?" parse-query)
+ (values #f))
+ (alt (seq "#" parse-fragment)
+ (values #f))))))
\f
-(define url:parse:scheme
+(define parse-net-path
+ (*parser
+ (encapsulate (lambda (v) (vector (vector-ref v 0) #f (vector-ref v 1)))
+ (seq "//"
+ parse-authority
+ (alt (encapsulate vector->list
+ (* (seq "/" parse-segment)))
+ (values '()))))))
+
+(define parse-abs-path
+ (*parser
+ (map (lambda (p) (vector #f #f p))
+ (encapsulate vector->list
+ (* (seq "/" parse-segment))))))
+
+(define parse-rel-path
(*parser
- (map intern
- (match (+ url:char-set:scheme)))))
+ (map (lambda (p) (vector #f #t p))
+ (encapsulate vector->list
+ (seq parse-rel-segment
+ (* (seq "/" parse-segment)))))))
-(define url:parse:hostport
+(define parse-segment
(*parser
- (seq (match url:match:host)
- (alt (map string->number
- (seq (noise ":")
- (match (+ (char-set url:char-set:digit)))))
+ (encapsulate (lambda (v)
+ (if (fix:> (vector-length v) 1)
+ (vector->list v)
+ (vector-ref v 0)))
+ (seq parse-pchar
+ (* (seq ";" parse-pchar))))))
+
+(define parse-authority
+ (*parser
+ (alt (encapsulate (lambda (v)
+ (%make-uri-server (vector-ref v 1)
+ (vector-ref v 2)
+ (vector-ref v 0)))
+ (seq (alt parse-userinfo
+ (values #f))
+ parse-hostport))
+ parse-reg-name
+ (values (%make-uri-server #f #f #f)))))
+
+(define parse-hostport
+ (*parser
+ (seq (match match-host)
+ (alt (seq (noise ":")
+ (alt (map string->number (match match-digits))
+ (values #f)))
(values #f)))))
-(define url:match:host
- (*matcher (alt url:match:hostname url:match:hostnumber)))
+(define match-host
+ (*matcher (alt match-hostname match-ipv4-address)))
-(define url:match:hostname
+(define match-hostname
(let ((match-tail
(*matcher
- (* (alt (char-set url:char-set:alphadigit)
- (seq (+ #\-)
- (char-set url:char-set:alphadigit)))))))
+ (? (seq (* (char-set char-set:uri-alphanum-))
+ (char-set char-set:uri-alphanum))))))
(*matcher
- (seq (* (seq (char-set url:char-set:alphadigit)
+ (seq (* (seq (char-set char-set:uri-alphanum)
match-tail
"."))
- (char-set url:char-set:alpha)
- match-tail))))
+ (char-set char-set:uri-alpha)
+ match-tail
+ (? ".")))))
-(define url:match:hostnumber
+(define match-ipv4-address
(*matcher
- (seq (+ (char-set url:char-set:digit))
- "."
- (+ (char-set url:char-set:digit))
- "."
- (+ (char-set url:char-set:digit))
- "."
- (+ (char-set url:char-set:digit)))))
+ (seq match-digits "." match-digits "." match-digits "." match-digits)))
+
+(define match-digits
+ (*matcher (+ (char-set char-set:uri-digit))))
\f
-(define (url:string-encoded? string)
- (url:substring-encoded? string 0 (string-length string)))
+;;;; Output
-(define url:substring-encoded?
- (let ((matcher (*matcher (complete (* url:match:xchar)))))
- (lambda (string start end)
- (matcher (string->parser-buffer string start end)))))
+(define (uri->string uri)
+ (guarantee-uri uri 'URI->STRING)
+ (call-with-output-string
+ (lambda (port)
+ (%write-uri uri port))))
-(define (url:encode-string string)
- (url:encode-substring string 0 (string-length string)))
-
-(define (url:encode-substring string start end)
- (let ((n-to-encode
- (let loop ((start start) (n-to-encode 0))
- (let ((index
- (substring-find-next-char-in-set string start end
- url:char-set:escaped)))
- (if index
- (loop (fix:+ index 1) (fix:+ n-to-encode 1))
- n-to-encode)))))
- (if (fix:= 0 n-to-encode)
- (substring string start end)
- (let ((encoded
- (make-string (fix:+ (fix:- end start) (fix:* 2 n-to-encode))))
- (digits "0123456789ABCDEF"))
- (let loop ((start start) (i 0))
- (let ((index
- (substring-find-next-char-in-set string start end
- url:char-set:escaped)))
- (if index
- (begin
- (substring-move! string start index encoded i)
- (let ((i (fix:+ i (fix:- index start)))
- (code (vector-8b-ref string index)))
- (string-set! encoded i #\%)
- (string-set! encoded
- (fix:+ i 1)
- (string-ref digits (fix:lsh code -4)))
- (string-set! encoded
- (fix:+ i 2)
- (string-ref digits (fix:and code #x0F)))
- (loop (fix:+ index 1) (fix:+ i 3))))
- (substring-move! string start end encoded i))))
- encoded))))
+(define (uri->symbol uri)
+ (utf8-string->symbol (uri->string uri)))
+
+(define (write-uri uri port)
+ (guarantee-uri uri 'WRITE-URI)
+ (guarantee-port port 'WRITE-URI)
+ (%write-uri uri port))
+
+(define (%write-uri uri port)
+ (let ((scheme (uri-scheme uri))
+ (authority (uri-authority uri))
+ (path-relative? (uri-path-relative? uri))
+ (path (uri-path uri))
+ (query (uri-query uri))
+ (fragment (uri-fragment uri)))
+ (if scheme
+ (begin
+ (write scheme port)
+ (write-char #\: port)))
+ (cond ((string? path)
+ (write-escaped-substring path 0 1 char-set:uric-no-slash port)
+ (write-escaped-substring path 1 (string-length path) char-set:uric
+ port))
+ (authority
+ (write-string "//" port)
+ (write-authority authority port)
+ (write-abs-path path port))
+ (path-relative?
+ (write-escaped (car path) char-set:uri-rel-segment port)
+ (write-abs-path (cdr path) port))
+ (else
+ (write-abs-path path port)))
+ (if query
+ (begin
+ (write-char #\? port)
+ (write-escaped query char-set:uric port)))
+ (if fragment
+ (begin
+ (write-char #\# port)
+ (write-escaped fragment char-set:uric port)))))
+
+(define (write-authority authority port)
+ (if (uri-server? authority)
+ (begin
+ (if (uri-server-userinfo authority)
+ (begin
+ (write-escaped (uri-server-userinfo authority)
+ char-set:uri-userinfo
+ port)
+ (write-char #\@ port)))
+ (if (uri-server-host authority)
+ (write-string (uri-server-host authority) port))
+ (if (uri-server-port authority)
+ (begin
+ (write-char #\: port)
+ (write (uri-server-port authority) port))))
+ (write-escaped authority char-set:uri-reg-name port)))
+
+(define (write-abs-path path port)
+ (let ((write-pchar
+ (lambda (string)
+ (write-escaped string char-set:uri-pchar port))))
+ (for-each (lambda (segment)
+ (write-char #\/ port)
+ (if (string? segment)
+ (write-pchar segment)
+ (for-each write-pchar segment)))
+ path)))
\f
-(define (url:decode-string string)
- (url:decode-substring string 0 (string-length string)))
-
-(define (url:decode-substring string start end)
- (let ((n-encoded
- (let loop ((start start) (n-encoded 0))
- (let ((index (substring-find-next-char string start end #\%)))
- (if index
- (loop (fix:+ index 1) (fix:+ n-encoded 1))
- n-encoded))))
- (lose
- (lambda ()
- (error "Malformed encoded URL string:"
- (substring string start end)))))
- (if (fix:= 0 n-encoded)
- (substring string start end)
- (let ((decoded
- (make-string (fix:- (fix:- end start) (fix:* 2 n-encoded)))))
- (let loop ((start start) (i 0))
- (let ((index (substring-find-next-char string start end #\%)))
- (if index
- (begin
- (if (not (fix:<= (fix:+ index 3) end))
- (lose))
- (let ((k
- (substring->number string
- (fix:+ index 1)
- (fix:+ index 3)
- 16))
- (i* (fix:+ i (fix:- index start))))
- (if (not k)
- (lose))
- (substring-move! string start index decoded i)
- (vector-8b-set! decoded i* k)
- (loop (fix:+ index 3) (fix:+ i* 1))))
- (substring-move! string start end decoded i))))
- decoded))))
\ No newline at end of file
+;;;; Escape codecs
+
+(define (component-parser-* cs)
+ (*parser
+ (map decode-component
+ (match (* (alt (char-set cs) match-escape))))))
+
+(define (component-parser-+ cs)
+ (*parser
+ (map decode-component
+ (match (+ (alt (char-set cs) match-escape))))))
+
+(define match-escape
+ (*matcher
+ (seq "%"
+ (char-set char-set:uri-hex)
+ (char-set char-set:uri-hex))))
+
+(define (decode-component string)
+ (if (string-find-next-char string #\%)
+ (call-with-output-string
+ (lambda (port)
+ (let ((end (string-length string)))
+ (let loop ((i 0))
+ (if (fix:< i end)
+ (if (char=? (string-ref string i) #\%)
+ (begin
+ (write-char (integer->char
+ (substring->number string
+ (fix:+ i 1)
+ (fix:+ i 3)
+ 16
+ #t))
+ port)
+ (loop (fix:+ i 3)))
+ (begin
+ (write-char (string-ref string i) port)
+ (loop (fix:+ i 1)))))))))
+ string))
+
+(define (write-escaped string unescaped port)
+ (write-escaped-substring string 0 (string-length string) unescaped port))
+
+(define (write-escaped-substring string start end unescaped port)
+ (do ((i start (fix:+ i 1)))
+ ((not (fix:< i end)))
+ (let ((char (string-ref string i)))
+ (if (char-set-member? unescaped char)
+ (write-char char port)
+ (let ((s (number->string (char->integer char) 16)))
+ (write-char #\% port)
+ (if (fix:= (string-length s) 1)
+ (write-char #\0 port))
+ (write-string s port))))))
+
+(define (complete-match matcher string #!optional start end)
+ (let ((buffer (string->parser-buffer string start end)))
+ (and (matcher buffer)
+ (not (peek-parser-buffer-char buffer)))))
+
+(define (complete-parse parser string #!optional start end)
+ (let ((buffer (string->parser-buffer string start end)))
+ (let ((v (parser buffer)))
+ (and v
+ (not (peek-parser-buffer-char buffer))
+ v))))
+
+;; backwards compatibility:
+(define (url:encode-string string)
+ (call-with-output-string
+ (lambda (port)
+ (write-escaped string url:char-set:unescaped port))))
\ No newline at end of file