;;;; Backwards compatibility
(define (string->char-set string)
- (scalar-values->char-set (map char->integer (ustring->list string))))
+ (scalar-values->char-set (map char->integer (string->list string))))
+;; Returns ASCII string:
(define (char-set->string char-set)
- (list->ustring (char-set-members char-set)))
+ (list->string (char-set-members char-set)))
+;; Returns only ASCII members:
(define (char-set-members char-set)
(guarantee-char-set char-set 'CHAR-SET-MEMBERS)
(let ((low (%char-set-low char-set)))
(let loop ((code 0))
- (if (fix:< code #x100)
+ (if (fix:< code #x80)
(if (%low-ref low code)
(cons (integer->char code)
(loop (fix:+ code 1)))
((textual-port-operation/peek-char port) port))
(define (input-port/read-string! port string)
- (input-port/read-substring! port string 0 (xstring-length string)))
+ (input-port/read-substring! port string 0 (ustring-length string)))
(define (input-port/read-substring! port string start end)
(if (< start end)
(guarantee index-fixnum? k 'read-string)
(let ((port (optional-input-port port 'read-string)))
(if (fix:> k 0)
- (let ((string (make-string k)))
+ (let ((string (make-ustring k)))
(let ((n (input-port/read-string! port string)))
(cond ((not n) n)
- ((fix:> n 0) (if (fix:< n k) (substring string 0 n) string))
+ ((fix:> n 0) (if (fix:< n k) (ustring-head string n) string))
(else (eof-object)))))
- (make-string 0))))
+ (make-ustring 0))))
\f
(define (read #!optional port environment)
(parse-object (optional-input-port port 'READ) environment))
(let ((port (optional-input-port port 'read-string!))
(end
(if (default-object? end)
- (xstring-length string)
+ (ustring-length string)
(begin
(guarantee index-fixnum? end 'read-string!)
- (if (not (fix:<= end (xstring-length string)))
+ (if (not (fix:<= end (ustring-length string)))
(error:bad-range-argument end 'read-string!))
end))))
(let ((start
(define-integrable keyword-prefix "#[keyword]")
(define (string->keyword string)
- (guarantee-string string 'STRING->KEYWORD)
- (string->symbol (string-append keyword-prefix string)))
+ (guarantee ustring? string 'STRING->KEYWORD)
+ (string->symbol (ustring-append keyword-prefix string)))
(define (keyword? object)
(and (interned-symbol? object)
- (string-prefix? keyword-prefix (symbol-name object))))
+ (ustring-prefix? keyword-prefix (symbol->string object))))
(define-guarantee keyword "keyword")
(define (keyword->string keyword)
(guarantee-keyword keyword 'KEYWORD->STRING)
- (string-tail (symbol-name keyword) (string-length keyword-prefix)))
\ No newline at end of file
+ (ustring-tail (symbol->string keyword) (ustring-length keyword-prefix)))
\ No newline at end of file
(declare (usual-integrations))
\f
-(define (string->number string #!optional radix error?)
- (if (not (string? string))
- (error:wrong-type-argument string "string" 'STRING->NUMBER))
- (parse-number string 0 (string-length string) radix error? 'STRING->NUMBER))
-
-(define (substring->number string start end #!optional radix error?)
- (if (not (string? string))
- (error:wrong-type-argument string "string" 'SUBSTRING->NUMBER))
- (if (not (index-fixnum? start))
- (error:wrong-type-argument start "string index" 'SUBSTRING->NUMBER))
- (if (not (index-fixnum? end))
- (error:wrong-type-argument end "string index" 'SUBSTRING->NUMBER))
- (if (not (fix:<= end (string-length string)))
- (error:bad-range-argument end 'SUBSTRING->NUMBER))
- (if (not (fix:<= start end))
- (error:bad-range-argument start 'SUBSTRING->NUMBER))
- (parse-number string start end radix error? 'SUBSTRING->NUMBER))
-
-(define (parse-number string start end radix error? caller)
- (let ((z
- (parse-number-1 string start end
- (if (default-object? radix) #f radix)
- caller)))
+(define (string->number string #!optional radix error? start end)
+ (let* ((caller 'string->number)
+ (end (fix:end-index end (ustring-length string) caller))
+ (start (fix:start-index start end caller))
+ (z
+ (parse-number string start end
+ (if (default-object? radix) #f radix)
+ caller)))
(if (and (not z) (if (default-object? error?) #f error?))
(error:bad-range-argument string caller))
z))
-(define (parse-number-1 string start end default-radix name)
+(define (substring->number string start end #!optional radix error?)
+ (string->number string radix error? start end))
+
+(define (parse-number string start end default-radix name)
(if (not (or (eq? #f default-radix) (eq? 2 default-radix)
(eq? 8 default-radix) (eq? 10 default-radix)
(eq? 16 default-radix)))
(error:bad-range-argument default-radix name))
(let loop ((start start) (exactness #f) (radix #f))
(and (fix:< start end)
- (if (char=? #\# (string-ref string start))
+ (if (char=? #\# (ustring-ref string start))
(let ((start (fix:+ start 1)))
(and (fix:< start end)
- (let ((char (string-ref string start))
+ (let ((char (ustring-ref string start))
(start (fix:+ start 1)))
(let ((do-radix
(lambda (r)
(define (parse-top-level string start end exactness radix)
(and (fix:< start end)
- (let ((char (string-ref string start))
+ (let ((char (ustring-ref string start))
(start (fix:+ start 1)))
(cond ((sign? char)
(find-leader string start end
(define (find-leader string start end exactness radix sign)
;; State: leading sign has been seen.
(and (fix:< start end)
- (let ((char (string-ref string start))
+ (let ((char (ustring-ref string start))
(start (fix:+ start 1)))
(cond ((char->digit char radix)
=> (lambda (digit)
(parse-digits string start end integer exactness radix
(lambda (start integer exactness sharp?)
(if (fix:< start end)
- (let ((char (string-ref string start))
+ (let ((char (ustring-ref string start))
(start+1 (fix:+ start 1)))
(cond ((char=? #\/ char)
(parse-denominator-1 string start+1 end
(define (parse-digits string start end integer exactness radix k)
(let loop ((start start) (integer integer))
(if (fix:< start end)
- (let ((char (string-ref string start)))
+ (let ((char (ustring-ref string start)))
(cond ((char->digit char radix)
=> (lambda (digit)
(loop (fix:+ start 1)
(do ((start (fix:+ start 1) (fix:+ start 1))
(integer (* integer radix) (* integer radix)))
((not (and (fix:< start end)
- (char=? #\# (string-ref string start))))
+ (char=? #\# (ustring-ref string start))))
(k start integer (or exactness 'IMPLICIT-INEXACT) #t))))
(else
(k start integer exactness #f))))
(define (parse-decimal-1 string start end exactness sign)
;; State: radix is 10, leading dot seen.
(and (fix:< start end)
- (let ((digit (char->digit (string-ref string start) 10))
+ (let ((digit (char->digit (ustring-ref string start) 10))
(start (fix:+ start 1)))
(and digit
(parse-decimal-2 string start end digit -1 exactness sign)))))
;; State: radix is 10, dot seen.
(let loop ((start start) (integer integer) (exponent exponent))
(if (fix:< start end)
- (let ((char (string-ref string start))
+ (let ((char (ustring-ref string start))
(start+1 (fix:+ start 1)))
(cond ((char->digit char 10)
=> (lambda (digit)
;; State: radix is 10, dot and # seen.
(let loop ((start start))
(if (fix:< start end)
- (let ((char (string-ref string start))
+ (let ((char (ustring-ref string start))
(start+1 (fix:+ start 1)))
(if (char=? #\# char)
(loop start+1)
(finish-real integer exponent exactness sign))))
(define (parse-decimal-4 string start end integer exponent exactness sign)
- (if (exponent-marker? (string-ref string start))
+ (if (exponent-marker? (ustring-ref string start))
(parse-exponent-1 string (fix:+ start 1) end
integer exponent exactness sign)
(parse-decimal-5 string start end integer exponent exactness sign)))
;; State: radix is 10, exponent seen.
(define (get-digits start esign)
(and (fix:< start end)
- (let ((digit (char->digit (string-ref string start) 10)))
+ (let ((digit (char->digit (ustring-ref string start) 10)))
(and digit
(let loop ((start (fix:+ start 1)) (eint digit))
(if (fix:< start end)
(let ((digit
- (char->digit (string-ref string start) 10)))
+ (char->digit (ustring-ref string start) 10)))
(if digit
(loop (fix:+ start 1)
(+ (* eint 10) digit))
integer exponent exactness sign))))
(and (fix:< start end)
- (let ((esign (string-ref string start)))
+ (let ((esign (ustring-ref string start)))
(if (sign? esign)
(get-digits (fix:+ start 1) esign)
(get-digits start #f)))))
\f
(define (parse-complex string start end real exactness radix sign)
(if (fix:< start end)
- (let ((char (string-ref string start))
+ (let ((char (ustring-ref string start))
(start+1 (fix:+ start 1))
(exactness (if (eq? 'IMPLICIT-INEXACT exactness) #f exactness)))
(cond ((sign? char)
((textual-port-operation/write-char port) port char))
(define (output-port/write-string port string)
- (output-port/write-substring port string 0 (xstring-length string)))
+ (output-port/write-substring port string 0 (ustring-length string)))
(define (output-port/write-substring port string start end)
((textual-port-operation/write-substring port) port string start end))
(let ((port (optional-output-port port 'WRITE-STRING))
(end
(if (default-object? end)
- (xstring-length string)
+ (ustring-length string)
(begin
(guarantee index-fixnum? end 'write-string)
- (if (not (fix:<= end (xstring-length string)))
+ (if (not (fix:<= end (ustring-length string)))
(error:bad-range-argument end 'write-string))
end))))
(let ((start
package))))
(define-integrable package-name-tag
- ((ucode-primitive string->symbol) "#[(package)package-name-tag]"))
+ '|#[(package)package-name-tag]|)
(define (find-package name #!optional error?)
(let package-loop ((packages *packages*))
(let ((p (->pathname pathname)))
(pathname-new-type
(pathname-new-name p
- (string-append
+ (ustring-append
(or (pathname-name p)
;; Interpret dirname/ as dirname/dirname-OS.pkd.
(let ((dir (pathname-directory p)))
(if (pair? dir)
(let ((name (last dir)))
- (if (string? name)
+ (if (ustring? name)
name
""))
"")))
(and (pair? clause)
(or (eq? (car clause) 'ELSE)
(vector-of-type? (car clause) symbol?))
- (vector-of-type? (cdr clause) string?)))))
- (vector-of-type? file-case string?))))
+ (vector-of-type? (cdr clause) ustring?)))))
+ (vector-of-type? file-case ustring?))))
(vector? (load-description/initializations object))
(vector? (load-description/finalizations object))))
\f
;;; that reads from an input port.
(define (string->parser-buffer string #!optional start end)
- (if (string? string)
- (let ((string (string->wide-string string start end)))
- (make-parser-buffer string 0 (wide-string-length string) 0 0 #f #t 0))
- (begin
- (guarantee wide-string? string 'STRING->PARSER-BUFFER)
- (let* ((end
- (if (or (default-object? end) (not end))
- (wide-string-length string)
- (guarantee-substring-end-index end
- (wide-string-length string)
- 'STRING->PARSER-BUFFER)))
- (start
- (if (or (default-object? start) (not start))
- 0
- (guarantee-substring-start-index start end
- 'STRING->PARSER-BUFFER))))
- (make-parser-buffer string start end 0 0 #f #t 0)))))
-
-(define (utf8-string->parser-buffer string #!optional start end)
- (let ((ws (utf8-string->wide-string string start end)))
- (make-parser-buffer ws 0 (wide-string-length ws) 0 0 #f #t 0)))
+ (let* ((caller 'string->parser-buffer)
+ (end (fix:end-index end (ustring-length string) caller))
+ (start (fix:start-index start end caller)))
+ (make-parser-buffer string start end 0 0 #f #t 0)))
(define (textual-input-port->parser-buffer port #!optional prefix)
(guarantee textual-input-port? port 'textual-input-port->parser-buffer)
(if (or (default-object? prefix)
(not prefix)
- (and (wide-string? prefix)
- (zero? (wide-string-length prefix))))
- (make-parser-buffer (make-wide-string min-length) 0 0 0 0 port #f 0)
- (begin
- (guarantee wide-string? prefix 'textual-input-port->parser-buffer)
- (let ((n (wide-string-length prefix)))
- (make-parser-buffer (%grow-buffer prefix n (max min-length n))
- 0 n 0 0 port #f 0)))))
+ (and (ustring? prefix)
+ (fix:= 0 (ustring-length prefix))))
+ (make-parser-buffer (make-ustring min-length) 0 0 0 0 port #f 0)
+ (let ((n (ustring-length prefix)))
+ (make-parser-buffer (%grow-buffer prefix n (fix:max min-length n))
+ 0 n 0 0 port #f 0))))
(define-integrable min-length 256)
\f
(define (*match-string matcher string #!optional start end)
(complete-*match matcher (string->parser-buffer string start end)))
-(define (*match-utf8-string matcher string #!optional start end)
- (complete-*match matcher (utf8-string->parser-buffer string start end)))
-
(define (*match-symbol matcher symbol)
- (*match-utf8-string matcher (symbol-name symbol)))
+ (*match-string matcher (symbol->string symbol)))
(define (complete-*parse parser buffer)
(let ((v (parser buffer)))
(define (*parse-string parser string #!optional start end)
(complete-*parse parser (string->parser-buffer string start end)))
-(define (*parse-utf8-string parser string #!optional start end)
- (complete-*parse parser (utf8-string->parser-buffer string start end)))
-
(define (*parse-symbol parser symbol)
- (*parse-utf8-string parser (symbol-name symbol)))
+ (*parse-string parser (symbol->string symbol)))
\f
(define-structure parser-buffer-pointer
(index #f read-only #t)
(set-parser-buffer-line! buffer (parser-buffer-pointer-line p)))
(define (get-parser-buffer-tail buffer p)
- (call-with-parser-buffer-tail buffer p string->utf8-string))
+ (call-with-parser-buffer-tail buffer p ustring-copy))
(define (call-with-parser-buffer-tail buffer p procedure)
;; P must be a buffer pointer previously returned by
;; characters available, return #F and leave the position unchanged.
(and (guarantee-buffer-chars buffer 1)
(let ((char
- (wide-string-ref (parser-buffer-string buffer)
- (parser-buffer-index buffer))))
+ (ustring-ref (parser-buffer-string buffer)
+ (parser-buffer-index buffer))))
(increment-buffer-index! buffer char)
char)))
;; current position. If there is a character available, return it,
;; otherwise return #F. The position is unaffected in either case.
(and (guarantee-buffer-chars buffer 1)
- (wide-string-ref (parser-buffer-string buffer)
- (parser-buffer-index buffer))))
+ (ustring-ref (parser-buffer-string buffer)
+ (parser-buffer-index buffer))))
(define (parser-buffer-ref buffer index)
(if (not (index-fixnum? index))
(error:wrong-type-argument index "index" 'PARSER-BUFFER-REF))
(and (guarantee-buffer-chars buffer (fix:+ index 1))
- (wide-string-ref (parser-buffer-string buffer)
- (fix:+ (parser-buffer-index buffer) index))))
+ (ustring-ref (parser-buffer-string buffer)
+ (fix:+ (parser-buffer-index buffer) index))))
(define (match-parser-buffer-char buffer char)
(match-char buffer char char=?))
(define-integrable (match-char buffer reference compare)
(and (guarantee-buffer-chars buffer 1)
(let ((char
- (wide-string-ref (parser-buffer-string buffer)
- (parser-buffer-index buffer))))
+ (ustring-ref (parser-buffer-string buffer)
+ (parser-buffer-index buffer))))
(and (compare char reference)
(begin
(increment-buffer-index! buffer char)
(define-integrable (match-char-no-advance buffer reference compare)
(and (guarantee-buffer-chars buffer 1)
- (compare (wide-string-ref (parser-buffer-string buffer)
- (parser-buffer-index buffer))
+ (compare (ustring-ref (parser-buffer-string buffer)
+ (parser-buffer-index buffer))
reference)))
(define-integrable (match-char-not buffer reference compare)
(match-string buffer string match-substring-loop-na char-ci=?))
(define-integrable (match-string buffer string loop compare)
- (cond ((wide-string? string)
- (loop buffer
- string 0 (wide-string-length string)
- compare wide-string-ref))
- ((string? string)
- (loop buffer
- string 0 (string-length string)
- compare string-ref))
- (else
- (error:wrong-type-argument string "string" #f))))
+ (loop buffer string 0 (ustring-length string) compare))
(define (match-parser-buffer-substring buffer string start end)
(match-substring buffer string start end match-substring-loop char=?))
(match-substring buffer string start end match-substring-loop-na char-ci=?))
(define-integrable (match-substring buffer string start end loop compare)
- (cond ((wide-string? string)
- (loop buffer
- string start end
- compare wide-string-ref))
- ((string? string)
- (loop buffer
- string start end
- compare string-ref))
- (else
- (error:wrong-type-argument string "string" #f))))
-
+ (guarantee ustring? string)
+ (loop buffer string start end compare))
\f
-(define-integrable (match-substring-loop buffer string start end
- compare extract)
+(define-integrable (match-substring-loop buffer string start end compare)
(and (guarantee-buffer-chars buffer (fix:- end start))
(let ((bs (parser-buffer-string buffer)))
(let loop
(bi (parser-buffer-index buffer))
(bl (parser-buffer-line buffer)))
(if (fix:< i end)
- (and (compare (extract string i) (wide-string-ref bs bi))
+ (and (compare (ustring-ref string i) (ustring-ref bs bi))
(loop (fix:+ i 1)
(fix:+ bi 1)
- (if (char=? (wide-string-ref bs bi) #\newline)
+ (if (char=? (ustring-ref bs bi) #\newline)
(fix:+ bl 1)
bl)))
(begin
(set-parser-buffer-line! buffer bl)
#t))))))
-(define-integrable (match-substring-loop-na buffer string start end
- compare extract)
+(define-integrable (match-substring-loop-na buffer string start end compare)
(and (guarantee-buffer-chars buffer (fix:- end start))
(let ((bs (parser-buffer-string buffer)))
(let loop ((i start) (bi (parser-buffer-index buffer)))
(if (fix:< i end)
- (and (compare (extract string i) (wide-string-ref bs bi))
+ (and (compare (ustring-ref string i) (ustring-ref bs bi))
(loop (fix:+ i 1) (fix:+ bi 1)))
#t)))))
(let loop ((i i) (n (parser-buffer-line buffer)))
(if (fix:< i j)
(loop (fix:+ i 1)
- (if (char=? (wide-string-ref s i) #\newline)
+ (if (char=? (ustring-ref s i) #\newline)
(fix:+ n 1)
n))
(set-parser-buffer-line! buffer n)))
(if (fix:> index 0)
(let* ((end* (fix:- end index))
(string*
- (let ((n (wide-string-length string)))
+ (let ((n (ustring-length string)))
(if (and (fix:> n min-length)
(fix:<= end* (fix:quotient n 4)))
- (make-wide-string (fix:quotient n 2))
+ (make-ustring (fix:quotient n 2))
string))))
(without-interruption
(lambda ()
- (do ((i index (fix:+ i 1))
- (j 0 (fix:+ j 1)))
- ((not (fix:< i end)))
- (wide-string-set! string* j (wide-string-ref string i)))
+ (ustring-copy! string* 0 string index end)
(set-parser-buffer-string! buffer string*)
(set-parser-buffer-index! buffer 0)
(set-parser-buffer-end! buffer end*)
(set-parser-buffer-base-offset!
buffer
- (+ (parser-buffer-base-offset buffer) index)))))))
+ (fix:+ (parser-buffer-base-offset buffer) index)))))))
(set-parser-buffer-start! buffer (parser-buffer-index buffer))))
\f
(define-integrable (guarantee-buffer-chars buffer n)
;; Don't read more characters than are needed. The XML parser
;; depends on this when doing its character-code detection.
(and (not (parser-buffer-at-end? buffer))
- (let ((min-end (+ (parser-buffer-index buffer) n))
+ (let ((min-end (fix:+ (parser-buffer-index buffer) n))
(end (parser-buffer-end buffer)))
- ;; (assert (> min-end end))
+ ;; (assert (fix:> min-end end))
(let ((string (parser-buffer-string buffer)))
- (if (> min-end (wide-string-length string))
+ (if (fix:> min-end (ustring-length string))
(set-parser-buffer-string! buffer
(%grow-buffer string end min-end))))
(let ((port (parser-buffer-port buffer))
(with-input-port-blocking-mode port 'BLOCKING
(lambda ()
(let loop ((end end))
- (if (< end min-end)
+ (if (fix:< end min-end)
(let ((n-read
(input-port/read-substring! port
string end min-end)))
- (if (> n-read 0)
- (let ((end (+ end n-read)))
+ (if (fix:> n-read 0)
+ (let ((end (fix:+ end n-read)))
(set-parser-buffer-end! buffer end)
(loop end))
(begin
(define (%grow-buffer string end min-length)
(let ((new-string
- (make-wide-string
- (let loop ((n (wide-string-length string)))
- (if (<= min-length n)
+ (make-ustring
+ (let loop ((n (ustring-length string)))
+ (if (fix:<= min-length n)
n
- (loop (* n 2)))))))
- (do ((i 0 (+ i 1)))
- ((not (< i end)))
- (wide-string-set! new-string i (wide-string-ref string i)))
+ (loop (fix:* n 2)))))))
+ (ustring-copy! new-string 0 string 0 end)
new-string))
\ No newline at end of file
(cond ((not char) #f)
((eof-object? char) 0)
(else
- (xstring-set! string start char)
+ (ustring-set! string start char)
(let loop ((index (+ start 1)))
(if (and (< index end)
(char-ready? port))
(if (or (not char) (eof-object? char))
(- index start)
(begin
- (xstring-set! string index char)
+ (ustring-set! string index char)
(loop (+ index 1)))))
(- index start))))))))
(let ((write-char (textual-port-operation/write-char port)))
(let loop ((i start))
(if (< i end)
- (let ((n (write-char port (xstring-ref string i))))
+ (let ((n (write-char port (ustring-ref string i))))
(cond ((not n) (and (> i start) (- i start)))
((> n 0) (loop (+ i 1)))
(else (- i start))))
(if (and n (> n 0))
(let ((end (+ start n)))
(set-textual-port-previous! port
- (xstring-ref string (- end 1)))
+ (ustring-ref string (- end 1)))
(transcribe-substring string start end port)))
n))))
(flush-output
(define (with-highlight-strings-printed pph thunk)
(let ((print-string
(lambda (s)
- (if (string? s)
+ (if (ustring? s)
(*unparse-string s)
(s (output-port))))))
(print-string (pph/start-string pph))
(define (pph/start-string-length pph)
(let ((start (pph/start-string pph)))
- (if (string? start)
- (string-length start)
+ (if (ustring? start)
+ (ustring-length start)
0)))
(define (pph/end-string-length pph)
(let ((end (pph/end-string pph)))
- (if (string? end)
- (string-length end)
+ (if (ustring? end)
+ (ustring-length end)
0)))
(define (pp-top-level expression port as-code? indentation list-depth)
numerical-walk))
(node (numerical-walk expression list-depth)))
(if (positive? indentation)
- (*unparse-string (make-string indentation #\space)))
+ (*unparse-string (make-ustring indentation #\space)))
(if as-code?
(print-node node indentation list-depth)
(print-non-code-node node indentation list-depth))
((prefix-node? node)
(*unparse-string (prefix-node-prefix node))
(let ((new-column
- (+ column (string-length (prefix-node-prefix node))))
+ (+ column (ustring-length (prefix-node-prefix node))))
(subnode (prefix-node-subnode node)))
(if (null? (dispatch-list))
(print-node subnode new-column depth)
(pad-with-spaces column))
(define-integrable (pad-with-spaces n-spaces)
- (*unparse-string (make-string n-spaces #\space)))
+ (*unparse-string (make-ustring n-spaces #\space)))
\f
;;;; Numerical Walk
(update-queue (cdr half-pointer/queue) '(CDR)))))
(if (eq? (car half-pointer/queue) (cdr pair))
(make-singleton-list-node
- (string-append
+ (ustring-append
". "
(circularity-string (cdr half-pointer/queue))))
(loop (cdr pair) list-breadth half-pointer/queue)))
(define (circularity-string queue)
(let ((depth (queue-depth queue))
(cdrs (queue/past-cdrs queue)))
- (string-append
+ (ustring-append
(cond ((= depth 1) "#[circularity (current parenthetical level")
((= depth 2) "#[circularity (up 1 parenthetical level")
(else
- (string-append "#[circularity (up "
- (number->string (-1+ depth))
- " parenthetical levels")))
+ (ustring-append "#[circularity (up "
+ (number->string (-1+ depth))
+ " parenthetical levels")))
(cond ((= cdrs 0) ")]")
((= cdrs 1) ", downstream 1 cdr.)]")
(else
- (string-append ", downstream "
- (number->string cdrs) " cdrs.)]"))))))
+ (ustring-append ", downstream "
+ (number->string cdrs)
+ " cdrs.)]"))))))
\f
;;;; Node Model
;;; be gained by keeping it around.
(define (symbol-length symbol)
- (string-length
+ (ustring-length
(call-with-output-string
(lambda (port)
(write symbol port)))))
(subnode #f read-only #t))
(define (make-prefix-node prefix subnode)
- (cond ((string? subnode)
- (string-append prefix subnode))
+ (cond ((ustring? subnode)
+ (ustring-append prefix subnode))
((prefix-node? subnode)
- (make-prefix-node (string-append prefix (prefix-node-prefix subnode))
+ (make-prefix-node (ustring-append prefix (prefix-node-prefix subnode))
(prefix-node-subnode subnode)))
(else
- (%make-prefix-node (+ (string-length prefix) (node-size subnode))
+ (%make-prefix-node (+ (ustring-length prefix) (node-size subnode))
prefix
subnode))))
((prefix-node? node) (prefix-node-size node))
((highlighted-node? node)
(highlighted-node/size node))
- (else (string-length node))))
+ (else (ustring-length node))))
(define-structure (highlighted-node
(conc-name highlighted-node/)
(vector-8b-maximum-length string-maximum-length)
(vector-8b? string?)
error:not-string
- error:not-xstring
guarantee-string
guarantee-string-index
- guarantee-xstring
hexadecimal->vector-8b
make-vector-8b
vector-8b->hexadecimal
string-hash-mod
string-head
string-head!
+ string-joiner
+ string-joiner*
string-length
string-lower-case?
string-map
string-search-backward
string-search-forward
string-set!
+ string-splitter
string-suffix-ci?
string-suffix?
string-tail
substring-upper-case?
substring<?
substring=?
- substring?
- utf8-string
- xstring-fill!
- xstring-length
- xstring-move!
- xstring-ref
- xstring-set!
- xstring?
- xsubstring
- xsubstring-fill!
- xsubstring-find-next-char
- xsubstring-find-next-char-ci
- xsubstring-find-next-char-in-set
- xsubstring-find-previous-char
- xsubstring-find-previous-char-ci
- xsubstring-find-previous-char-in-set
- xsubstring-move!)
- (export (runtime generic-i/o-port)
- %substring-move!)
+ substring?)
(initialization (initialize-package!)))
(define-package (runtime ustring)
(files "ustring")
(parent (runtime))
(export ()
- (make-ustring make-utf32-string)
(usubstring ustring-copy)
list->ustring
+ make-ustring
string-for-primitive ;export to (runtime) after 9.3
ustring
ustring*
ustring->ascii
ustring->list
- ustring->utf8-string ;temporary scaffolding
ustring->vector
ustring-any
ustring-append
+ ustring-append*
ustring-ascii?
ustring-capitalize
ustring-ci<=?
ustring>=?
ustring>?
ustring?
- utf8-string->ustring ;temporary scaffolding
;; vector->ustring
)
(export (runtime predicate-metadata)
unicode-char->scalar-value
unicode-char?
unicode-scalar-value?)
- (export (runtime unicode)
- legal-code-16?
- legal-code-32?)
(initialization (initialize-package!)))
(define-package (runtime character-set)
(parent (runtime))
(export ()
;; BEGIN deprecated bindings
+ (8-bit-alphabet? 8-bit-char-set?)
+ (alphabet char-set)
+ (alphabet+ char-set-union)
+ (alphabet- char-set-difference)
+ (alphabet->string char-set->string)
+ (alphabet-predicate char-set-predicate)
+ (alphabet? char-set?)
(error:not-8-bit-alphabet error:not-8-bit-char-set)
(error:not-alphabet error:not-char-set)
(guarantee-8-bit-alphabet guarantee-8-bit-char-set)
(guarantee-alphabet guarantee-char-set)
+ (string->alphabet string->char-set)
+ alphabet->char-set
+ alphabet->scalar-values
+ char-in-alphabet?
+ char-set->alphabet
error:not-8-bit-char-set
error:not-char-set
error:not-well-formed-scalar-value-list
guarantee-char-set
guarantee-well-formed-scalar-value-list
guarantee-well-formed-scalar-value-range
+ scalar-values->alphabet
;; END deprecated bindings
- (8-bit-alphabet? 8-bit-char-set?)
- (<alphabet> <char-set>)
- (alphabet char-set)
- (alphabet+ char-set-union)
- (alphabet- char-set-difference)
- (alphabet->string char-set->string)
- (alphabet-predicate char-set-predicate)
- (alphabet? char-set?)
- (string->alphabet string->char-set)
8-bit-char-set?
- <char-set>
- alphabet->char-set
- alphabet->scalar-values
ascii-range->char-set
char-alphabetic?
char-alphanumeric?
char-ctl?
char-graphic?
- char-in-alphabet?
char-lower-case?
char-numeric?
char-set
- char-set->alphabet
char-set->scalar-values
char-set-difference
char-set-intersection
char-wsp?
chars->char-set
scalar-value-in-char-set?
- scalar-values->alphabet
scalar-values->char-set
string->char-set
well-formed-scalar-value-list?
get-output-string
get-output-string!
open-input-string
- open-output-string)
- (initialization (initialize-package!)))
+ open-output-string))
(define-package (runtime syntax)
(files)
(files "parser-buffer")
(parent (runtime))
(export ()
- ;; Deprecated:
+ ;; BEGIN deprecated bindings
(input-port->parser-buffer textual-input-port->parser-buffer)
(match-parser-buffer-char-in-alphabet match-parser-buffer-char-in-set)
(match-parser-buffer-char-in-alphabet-no-advance
(match-parser-buffer-char-not-in-alphabet-no-advance
match-parser-buffer-char-not-in-set-no-advance)
(match-utf8-char-in-alphabet match-parser-buffer-char-in-set)
+ ;; END deprecated bindings
*match-string
*match-symbol
- *match-utf8-string
*parse-string
*parse-symbol
- *parse-utf8-string
call-with-parser-buffer-tail
complete-*match
complete-*parse
read-parser-buffer-char
set-parser-buffer-pointer!
string->parser-buffer
- textual-input-port->parser-buffer
- utf8-string->parser-buffer))
-
-(define-package (runtime unicode)
- (files "unicode")
- (parent (runtime))
- (export ()
- call-with-utf8-input-string
- call-with-utf8-output-string
- for-all-chars-in-string?
- make-wide-string
- open-utf8-input-string
- open-utf8-output-string
- string->utf8-string
- string->wide-string
- utf8-string->string
- utf8-string->wide-string
- utf8-string-length
- utf8-string-valid?
- utf8-string?
- wide-string
- wide-string->string
- wide-string-index?
- wide-string-length
- wide-string-ref
- wide-string-set!
- wide-string?
- wide-substring))
+ textual-input-port->parser-buffer))
(define-package (runtime uri)
(files "url")
(define (string . objects)
(%string-append (map ->string objects)))
-(define (utf8-string . objects)
- (%string-append (map ->utf8-string objects)))
-
(define (->string object)
(cond ((string? object) object)
((symbol? object) (symbol->string object))
- ((wide-string? object) (wide-string->string object))
((8-bit-char? object) (make-string 1 object))
(else (%->string object 'STRING))))
-(define (->utf8-string object)
- (cond ((string? object) (string->utf8-string object))
- ((symbol? object) (symbol-name object))
- ((wide-string? object) (string->utf8-string object))
- ((unicode-char? object)
- (string->utf8-string (wide-string object)))
- (else (%->string object 'UTF8-STRING))))
-
(define (%->string object caller)
(cond ((not object) "")
((number? object) (number->string object))
(VECTOR-8B-SET! STRING2 START2 CODE)
,(let loop ((i 1))
(if (< i n)
- `(LET ((CODE (VECTOR-8B-REF STRING1 (FIX:+ START1 ,i))))
+ `(LET ((CODE
+ (VECTOR-8B-REF STRING1
+ (FIX:+ START1 ,i))))
(AND (FIX:< CODE #x80)
(BEGIN
- (VECTOR-8B-SET! STRING2 (FIX:+ START2 ,i) CODE)
+ (VECTOR-8B-SET! STRING2
+ (FIX:+ START2 ,i)
+ CODE)
,(loop (+ i 1)))))
'STRING2)))))))))
(unrolled-move-right
(VECTOR-8B-SET! STRING2 (FIX:+ START2 ,(- n 1)) CODE)
,(let loop ((i (- n 1)))
(if (> i 0)
- `(LET ((CODE (VECTOR-8B-REF STRING1 (FIX:+ START1 ,(- i 1)))))
+ `(LET ((CODE
+ (VECTOR-8B-REF STRING1
+ (FIX:+ START1 ,(- i 1)))))
(AND (FIX:< CODE #x80)
(BEGIN
- (VECTOR-8B-SET! STRING2 (FIX:+ START2 ,(- i 1)) CODE)
+ (VECTOR-8B-SET! STRING2
+ (FIX:+ START2 ,(- i 1))
+ CODE)
,(loop (- i 1)))))
'STRING2))))))))))
(let ((n (fix:- end1 start1)))
(loop (cdr strings) (fix:+ index size)))
result))))
-(define (decorated-string-append prefix infix suffix strings)
- (guarantee-string prefix 'DECORATED-STRING-APPEND)
- (guarantee-string infix 'DECORATED-STRING-APPEND)
- (guarantee-string suffix 'DECORATED-STRING-APPEND)
- (%decorated-string-append prefix infix suffix strings
- 'DECORATED-STRING-APPEND))
-
-(define (%decorated-string-append prefix infix suffix strings procedure)
- (if (pair? strings)
- (let ((np (string-length prefix))
- (ni (string-length infix))
- (ns (string-length suffix)))
- (guarantee-string (car strings) procedure)
- (let ((string
- (make-string
- (let ((ni* (fix:+ np (fix:+ ni ns))))
- (do ((strings (cdr strings) (cdr strings))
- (count (fix:+ np (string-length (car strings)))
- (fix:+ count
- (fix:+ ni*
- (string-length (car strings))))))
- ((not (pair? strings))
- (fix:+ count ns))
- (guarantee-string (car strings) procedure))))))
- (let ((mp
- (lambda (index)
- (%substring-move! prefix 0 np string index)))
- (mi
- (lambda (index)
- (%substring-move! infix 0 ni string index)))
- (ms
- (lambda (index)
- (%substring-move! suffix 0 ns string index)))
- (mv
- (lambda (s index)
- (%substring-move! s 0 (string-length s) string index))))
- (let loop
- ((strings (cdr strings))
- (index (mv (car strings) (mp 0))))
- (if (pair? strings)
- (loop (cdr strings)
- (mv (car strings) (mp (mi (ms index)))))
- (ms index))))
- string))
- (make-string 0)))
-\f
-(define (burst-string string delimiter allow-runs?)
- (guarantee-string string 'BURST-STRING)
- (let ((end (string-length string)))
- (cond ((char? delimiter)
- (let loop ((start 0) (index 0) (result '()))
- (cond ((fix:= index end)
- (reverse!
- (if (and allow-runs? (fix:= start index))
- result
- (cons (%substring string start index) result))))
- ((char=? delimiter (string-ref string index))
- (loop (fix:+ index 1)
- (fix:+ index 1)
- (if (and allow-runs? (fix:= start index))
- result
- (cons (%substring string start index) result))))
- (else
- (loop start (fix:+ index 1) result)))))
- ((char-set? delimiter)
- (let loop ((start 0) (index 0) (result '()))
- (cond ((fix:= index end)
- (reverse!
- (if (and allow-runs? (fix:= start index))
- result
- (cons (%substring string start index) result))))
- ((char-set-member? delimiter (string-ref string index))
- (loop (fix:+ index 1)
- (fix:+ index 1)
- (if (and allow-runs? (fix:= start index))
- result
- (cons (%substring string start index) result))))
- (else
- (loop start (fix:+ index 1) result)))))
- (else
- (error:wrong-type-argument delimiter "character or character set"
- 'BURST-STRING)))))
-
(define (reverse-string string)
(guarantee-string string 'REVERSE-STRING)
(%reverse-substring string 0 (string-length string)))
(string-set! string j (string-ref string i))
(string-set! string i char)))))
\f
+(define (decorated-string-append prefix infix suffix strings)
+ ((string-joiner* prefix infix suffix) strings))
+
+(define (string-joiner infix #!optional prefix suffix)
+ (let ((joiner (string-joiner* prefix infix suffix)))
+ (lambda strings
+ (joiner strings))))
+
+(define (string-joiner* infix #!optional prefix suffix)
+ (let ((prefix (if (default-object? prefix) "" prefix))
+ (suffix (if (default-object? suffix) "" suffix)))
+ (let ((infix (ustring-append suffix infix prefix)))
+
+ (lambda (strings)
+ (ustring-append*
+ (if (pair? strings)
+ (cons* prefix
+ (car strings)
+ (let loop ((strings (cdr strings)))
+ (if (pair? strings)
+ (cons* infix
+ (car strings)
+ (loop (cdr strings)))
+ (list suffix))))
+ '()))))))
+
+(define (burst-string string delimiter allow-runs?)
+ ((string-splitter delimiter allow-runs?) string))
+
+(define (string-splitter delimiter #!optional allow-runs?)
+ (let ((predicate (splitter-delimiter->predicate delimiter))
+ (allow-runs? (if (default-object? allow-runs?) #t allow-runs?)))
+
+ (lambda (string #!optional start end)
+ (let* ((end (fix:end-index end (ustring-length string) 'string-splitter))
+ (start (fix:start-index start end 'string-splitter)))
+
+ (define (find-start start)
+ (if allow-runs?
+ (let loop ((index start))
+ (if (fix:< index end)
+ (if (predicate (ustring-ref string index))
+ (loop (fix:+ index 1))
+ (find-end index (fix:+ index 1)))
+ '()))
+ (find-end start start)))
+
+ (define (find-end start index)
+ (let loop ((index index))
+ (if (fix:< index end)
+ (if (predicate (ustring-ref string index))
+ (cons (ustring-copy string start index)
+ (find-start (fix:+ index 1)))
+ (loop (fix:+ index 1)))
+ (list (ustring-copy string start end)))))
+
+ (find-start start)))))
+
+(define (splitter-delimiter->predicate delimiter)
+ (cond ((char? delimiter) (char=-predicate delimiter))
+ ((char-set? delimiter) (char-set-predicate delimiter))
+ ((unary-procedure? delimiter) delimiter)
+ (else (error:not-a splitter-delimiter? delimiter 'string-splitter))))
+
+(define (splitter-delimiter? object)
+ (or (char? object)
+ (char-set? object)
+ (unary-procedure? object)))
+\f
(define (vector-8b->hexadecimal bytes)
(define-integrable (hex-char k)
(string-ref "0123456789abcdef" (fix:and k #x0F)))
(define (lisp-string->camel-case string #!optional upcase-initial?)
(call-with-input-string string
(lambda (input)
- (call-narrow-output-string
+ (call-with-output-string
(lambda (output)
(let loop
((upcase?
(outer k (fix:+ q 1)))))
pi))
\f
-(define (xstring? object)
- (or (string? object)
- (wide-string? object)))
-
-(define (xstring-length string)
- (cond ((string? string) (string-length string))
- ((wide-string? string) (wide-string-length string))
- (else (error:not-xstring string 'XSTRING-LENGTH))))
-
-(define (xstring-ref string index)
- (cond ((string? string) (string-ref string index))
- ((wide-string? string) (wide-string-ref string index))
- (else (error:not-xstring string 'XSTRING-REF))))
-
-(define (xstring-set! string index char)
- (cond ((string? string) (string-set! string index char))
- ((wide-string? string) (wide-string-set! string index char))
- (else (error:not-xstring string 'XSTRING-SET!))))
-
-(define (xstring-move! xstring1 xstring2 start2)
- (xsubstring-move! xstring1 0 (xstring-length xstring1) xstring2 start2))
-
-(define (xsubstring-move! xstring1 start1 end1 xstring2 start2)
- (cond ((or (not (eq? xstring2 xstring1)) (< start2 start1))
- (substring-move-left! xstring1 start1 end1
- xstring2 start2))
- ((> start2 start1)
- (substring-move-right! xstring1 start1 end1
- xstring2 start2))))
-
-(define (xsubstring xstring start end)
- (guarantee-xsubstring xstring start end 'XSUBSTRING)
- (let ((string (make-string (- end start))))
- (xsubstring-move! xstring start end string 0)
- string))
-\f
-(define (xstring-fill! xstring char)
- (cond ((string? xstring)
- (string-fill! xstring char))
- (else
- (error:not-xstring xstring 'XSTRING-FILL!))))
-
-(define (xsubstring-fill! xstring start end char)
- (cond ((string? xstring)
- (substring-fill! xstring start end char))
- (else
- (error:not-xstring xstring 'XSTRING-FILL!))))
-
-(define-integrable (xsubstring-find-char xstring start end datum finder caller)
- (cond ((string? xstring)
- (guarantee-substring xstring start end caller)
- (finder xstring start end datum))
- (else
- (error:not-xstring xstring caller))))
-
-(define (xsubstring-find-next-char xstring start end char)
- (guarantee-char char 'XSUBSTRING-FIND-NEXT-CHAR)
- (xsubstring-find-char xstring start end (char->ascii char)
- (ucode-primitive VECTOR-8B-FIND-NEXT-CHAR)
- 'XSUBSTRING-FIND-NEXT-CHAR))
-
-(define (xsubstring-find-next-char-ci xstring start end char)
- (guarantee-char char 'XSUBSTRING-FIND-NEXT-CHAR-CI)
- (xsubstring-find-char xstring start end (char->ascii char)
- (ucode-primitive VECTOR-8B-FIND-NEXT-CHAR-CI)
- 'XSUBSTRING-FIND-NEXT-CHAR-CI))
-
-(define (xsubstring-find-next-char-in-set xstring start end char-set)
- (guarantee-char-set char-set 'XSUBSTRING-FIND-NEXT-CHAR-IN-SET)
- (xsubstring-find-char xstring start end (char-set-table char-set)
- (ucode-primitive SUBSTRING-FIND-NEXT-CHAR-IN-SET)
- 'XSUBSTRING-FIND-NEXT-CHAR-IN-SET))
-
-(define (xsubstring-find-previous-char xstring start end char)
- (guarantee-char char 'XSUBSTRING-FIND-PREVIOUS-CHAR)
- (xsubstring-find-char xstring start end (char->ascii char)
- (ucode-primitive VECTOR-8B-FIND-PREVIOUS-CHAR)
- 'XSUBSTRING-FIND-PREVIOUS-CHAR))
-
-(define (xsubstring-find-previous-char-ci xstring start end char)
- (guarantee-char char 'XSUBSTRING-FIND-PREVIOUS-CHAR-CI)
- (xsubstring-find-char xstring start end (char->ascii char)
- (ucode-primitive VECTOR-8B-FIND-PREVIOUS-CHAR-CI)
- 'XSUBSTRING-FIND-PREVIOUS-CHAR-CI))
-
-(define (xsubstring-find-previous-char-in-set xstring start end char-set)
- (guarantee-char-set char-set 'XSUBSTRING-FIND-PREVIOUS-CHAR-IN-SET)
- (xsubstring-find-char xstring start end (char-set-table char-set)
- (ucode-primitive SUBSTRING-FIND-PREVIOUS-CHAR-IN-SET)
- 'XSUBSTRING-FIND-PREVIOUS-CHAR-IN-SET))
-\f
;;;; Guarantors
;;
;; The guarantors are integrated. Most are structured as combination of
;; meaningful message. Structuring the code this way significantly
;; reduces code bloat from large integrated procedures.
-(declare (integrate-operator guarantee-string guarantee-xstring))
+(declare (integrate-operator guarantee-string))
(define-guarantee string "string")
-(define-guarantee xstring "xstring")
(define-integrable (guarantee-2-strings object1 object2 procedure)
(if (not (and (string? object1) (string? object2)))
(if (not (index-fixnum? object))
(error:wrong-type-argument object "string index" caller)))
-(define-integrable (guarantee-xstring-index object caller)
- (if (not (exact-nonnegative-integer? object))
- (error:wrong-type-argument object "xstring index" caller)))
-
(define-integrable (guarantee-substring string start end caller)
(if (not (and (string? string)
(index-fixnum? start)
(guarantee-string string caller)
(guarantee-substring-end-index end (string-length string) caller)
(guarantee-substring-start-index start end caller))
-\f
-(define-integrable (guarantee-xsubstring xstring start end caller)
- (if (not (and (xstring? xstring)
- (exact-nonnegative-integer? start)
- (exact-nonnegative-integer? end)
- (<= start end)
- (<= end (xstring-length xstring))))
- (guarantee-xsubstring/fail xstring start end caller)))
-
-(define (guarantee-xsubstring/fail xstring start end caller)
- (guarantee-xstring xstring caller)
- (guarantee-xsubstring-end-index end (xstring-length xstring) caller)
- (guarantee-xsubstring-start-index start end caller))
(define-integrable (guarantee-substring-end-index end length caller)
(guarantee-string-index end caller)
(error:bad-range-argument start caller))
start)
-(define-integrable (guarantee-xsubstring-end-index end length caller)
- (guarantee-xstring-index end caller)
- (if (not (<= end length))
- (error:bad-range-argument end caller))
- end)
-
-(define-integrable (guarantee-xsubstring-start-index start end caller)
- (guarantee-xstring-index start caller)
- (if (not (<= start end))
- (error:bad-range-argument start caller))
- start)
-
(define-integrable (guarantee-2-substrings string1 start1 end1
string2 start2 end2
procedure)
(j start* (fix:+ j 1)))
((not (fix:< i limit))
(set! index i))
- (bytevector-u8-set! bv j
- (char->ascii (ustring-ref string i)))))
+ (bytevector-u8-set! bv j (char->ascii (ustring-ref string i)))))
n)))))
(define (make-octets-input-type)
(define (get-object-type-name obj)
(cond ((boolean? obj) "boolean")
- ((string? obj) "string")
+ ((ustring? obj) "string")
((char? obj) "char")
((fixnum? obj) "fixnum")
((integer? obj) "integer")
((symbol? obj) "symbol")
((weak-pair? obj) "weak-pair")
((record-type? obj) "record-type")
- ((wide-string? obj) "wide-string")
(else (user-object-type obj))))
\f
;;;; Miscellaneous
(if nonblock?
(set-output-port-blocking-mode! port 'nonblocking))
(receiver
- (let ((buffer (make-wide-string bsize)))
+ (let ((buffer (make-ustring bsize)))
(lambda ()
(with-input-port-blocking-mode process-input 'BLOCKING
(lambda ()
- (let ((n
- (input-port/read-string! process-input buffer)))
+ (let ((n (input-port/read-string! process-input buffer)))
(if n
(if (fix:> n 0)
(output-port/write-substring port buffer 0 n)
(let ((input-port/open? (port/operation port 'INPUT-OPEN?))
(input-port/close (port/operation port 'CLOSE-INPUT)))
(if process-output
- (let ((buffer (make-wide-string bsize)))
+ (let ((buffer (make-ustring bsize)))
(let ((copy-output
(lambda ()
(let ((n (input-port/read-string! port buffer)))
+++ /dev/null
-#| -*-Scheme-*-
-
-Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994,
- 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
- 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016,
- 2017 Massachusetts Institute of Technology
-
-This file is part of MIT/GNU Scheme.
-
-MIT/GNU Scheme is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or (at
-your option) any later version.
-
-MIT/GNU Scheme is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with MIT/GNU Scheme; if not, write to the Free Software
-Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,
-USA.
-
-|#
-
-;;;; Unicode support
-;;; package: (runtime unicode)
-
-;;; See "http://www.cl.cam.ac.uk/~mgk25/unicode.html".
-;;;
-;;; UTF-8 encoding
-;;; ==============
-;;;
-;;; max code encoding
-;;; ---------- -----------------------------------------------------
-;;; #x00000080 0xxxxxxx
-;;; #x00000800 110xxxxx 10xxxxxx
-;;; #x00010000 1110xxxx 10xxxxxx 10xxxxxx
-;;; #x00200000 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-;;; #x04000000 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
-;;; #x80000000 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
-;;;
-;;; It is possible to represent codes with over-long sequences, but
-;;; this is disallowed. For example, #\A is normally represented as
-;;; #x41, but could also be written as #xC1 #x81, or even longer
-;;; sequences.
-;;;
-;;; UTF-16 encoding
-;;; ===============
-;;;
-;;; Codes in the ranges #x0000 through #xD7FF and #xE000 through
-;;; #xFFFD are represented as themselves. Codes in the range #x10000
-;;; through #xFFFFF are represented as a pair:
-;;;
-;;; 110110xxxxxxxxxx 110111xxxxxxxxxx
-;;;
-;;; where the first 16-bit word contains the MS 10 bits, and the
-;;; second contains the LS 10 bits. As for UTF-8, overlong sequences
-;;; are disallowed.
-;;;
-;;; Some UTF-16 documents start with the code #xFEFF, to identify the
-;;; endianness of the document. If instead #xFFFE is encountered, the
-;;; opposite endianness should be used.
-
-(declare (usual-integrations))
-\f
-(define-syntax with-substring-args
- (sc-macro-transformer
- (lambda (form environment)
- (if (syntax-match? '(expression expression expression expression
- + expression)
- (cdr form))
- (let ((string (close-syntax (list-ref form 1) environment))
- (start (close-syntax (list-ref form 2) environment))
- (end (close-syntax (list-ref form 3) environment))
- (caller (close-syntax (list-ref form 4) environment)))
- `(BEGIN
- (GUARANTEE-STRING ,string ,caller)
- (LET* ((,(list-ref form 3)
- (IF (IF (DEFAULT-OBJECT? ,end) #F ,end)
- (GUARANTEE-LIMITED-INDEX ,end (STRING-LENGTH ,string)
- ,caller)
- (STRING-LENGTH ,string)))
- (,(list-ref form 2)
- (IF (IF (DEFAULT-OBJECT? ,start) #F ,start)
- (GUARANTEE-LIMITED-INDEX ,start ,(list-ref form 3)
- ,caller)
- 0)))
- ,@(map (let ((excludes
- (list (list-ref form 2) (list-ref form 3))))
- (lambda (expr)
- (make-syntactic-closure environment excludes expr)))
- (list-tail form 5)))))
- (ill-formed-syntax form)))))
-
-(define (guarantee-limited-index index limit caller)
- (guarantee-index-fixnum index caller)
- (if (not (fix:<= index limit))
- (error:bad-range-argument index caller))
- index)
-
-(define (encoded-string-length string start end type caller validate-char)
- (let loop ((start start) (n 0))
- (if (fix:< start end)
- (let ((start* (validate-char string start end)))
- (if (not start*)
- (error:wrong-type-argument string
- (string-append "a UTF-"
- type
- " string")
- caller))
- (loop start* (fix:+ n 1)))
- n)))
-
-(define (encoded-string-valid? string start end validate-char)
- (let loop ((start start))
- (if (fix:< start end)
- (let ((start* (validate-char string start end)))
- (if start*
- (loop start*)
- #f))
- #t)))
-\f
-(define (coded-input-opener coding)
- (lambda (string #!optional start end)
- (let ((port (open-input-octets string start end)))
- (port/set-coding port coding)
- (port/set-line-ending port 'NEWLINE)
- port)))
-
-(define (coded-output-opener coding)
- (lambda ()
- (let ((port (open-output-octets)))
- (port/set-coding port coding)
- (port/set-line-ending port 'NEWLINE)
- port)))
-
-(define (ended-input-opener be le)
- (lambda (string #!optional start end)
- (if (host-big-endian?)
- (be string start end)
- (le string start end))))
-
-(define (ended-output-opener be le)
- (lambda ()
- (if (host-big-endian?)
- (be)
- (le))))
-
-(define (input-string-caller open-input)
- (lambda (string procedure)
- (let ((port (open-input string)))
- (let ((value (procedure port)))
- (close-input-port port)
- value))))
-
-(define (output-string-caller open-output)
- (lambda (procedure)
- (let ((port (open-output)))
- (procedure port)
- (get-output-string! port))))
-\f
-;;;; Unicode strings
-
-(define-structure (wide-string (type-descriptor <wide-string>)
- (constructor %make-wide-string))
- (contents #f read-only #t))
-
-(define-guarantee wide-string "a Unicode string")
-
-(define (make-wide-string length #!optional char)
- (%make-wide-string
- (make-vector length
- (if (if (default-object? char) #f char)
- (begin
- (guarantee-unicode-char char 'MAKE-WIDE-STRING)
- char)
- (integer->char 0)))))
-
-(define (wide-string . chars)
- (for-each (lambda (char) (guarantee-unicode-char char 'WIDE-STRING)) chars)
- (%make-wide-string (list->vector chars)))
-
-(define (wide-string-length string)
- (guarantee wide-string? string 'WIDE-STRING-LENGTH)
- (%wide-string-length string))
-
-(define-integrable (%wide-string-length string)
- (vector-length (wide-string-contents string)))
-
-(define (wide-string-ref string index)
- (guarantee wide-string? string 'WIDE-STRING-REF)
- (guarantee-wide-string-index index string 'WIDE-STRING-REF)
- (%wide-string-ref string index))
-
-(define-integrable (%wide-string-ref string index)
- (vector-ref (wide-string-contents string) index))
-
-(define (wide-string-set! string index char)
- (guarantee wide-string? string 'WIDE-STRING-SET!)
- (guarantee-wide-string-index index string 'WIDE-STRING-SET!)
- (guarantee-unicode-char char 'WIDE-STRING-SET!)
- (%wide-string-set! string index char))
-
-(define-integrable (%wide-string-set! string index char)
- (vector-set! (wide-string-contents string) index char))
-
-(define (wide-substring string start end)
- (guarantee-wide-substring string start end 'WIDE-SUBSTRING)
- (%wide-substring string start end))
-
-(define (%wide-substring string start end)
- (let ((string* (make-wide-string (fix:- end start))))
- (let ((v1 (wide-string-contents string))
- (v2 (wide-string-contents string*)))
- (do ((i start (fix:+ i 1))
- (j 0 (fix:+ j 1)))
- ((not (fix:< i end)))
- (vector-set! v2 j (vector-ref v1 i))))
- string*))
-\f
-(define (wide-string-index? index string)
- (and (index-fixnum? index)
- (fix:< index (%wide-string-length string))))
-
-(define-integrable (guarantee-wide-string-index index string caller)
- (if (not (wide-string-index? index string))
- (error:not-wide-string-index index caller)))
-
-(define (error:not-wide-string-index index caller)
- (error:wrong-type-argument index "a Unicode string index" caller))
-
-(define-integrable (guarantee-wide-substring string start end caller)
- (if (not (and (wide-string? string)
- (index-fixnum? start)
- (index-fixnum? end)
- (fix:<= start end)
- (fix:<= end (%wide-string-length string))))
- (guarantee-wide-substring/fail string start end caller)))
-
-(define (guarantee-wide-substring/fail string start end caller)
- (guarantee wide-string? string caller)
- (guarantee-limited-index end (%wide-string-length string) caller)
- (guarantee-limited-index start end caller))
-
-(define (string->wide-string string #!optional start end)
- (%convert-string string start end
- open-input-string
- open-wide-output-string))
-
-(define (wide-string->string string #!optional start end)
- (%convert-string string start end
- open-input-string
- open-narrow-output-string))
-
-(define (%convert-string string start end open-input open-output)
- (let ((input (open-input string start end))
- (output (open-output)))
- (let loop ()
- (let ((c (read-char input)))
- (if (not (eof-object? c))
- (begin
- (write-char c output)
- (loop)))))
- (get-output-string! output)))
-\f
-;;;; UTF-32 representation
-
-(define open-utf32-be-input-string
- (coded-input-opener 'UTF-32BE))
-
-(define open-utf32-le-input-string
- (coded-input-opener 'UTF-32LE))
-
-(define open-utf32-input-string
- (ended-input-opener open-utf32-be-input-string
- open-utf32-le-input-string))
-
-(define call-with-utf32-be-input-string
- (input-string-caller open-utf32-be-input-string))
-
-(define call-with-utf32-le-input-string
- (input-string-caller open-utf32-le-input-string))
-
-(define call-with-utf32-input-string
- (input-string-caller open-utf32-input-string))
-
-(define open-utf32-be-output-string
- (coded-output-opener 'UTF-32BE))
-
-(define open-utf32-le-output-string
- (coded-output-opener 'UTF-32LE))
-
-(define open-utf32-output-string
- (ended-output-opener open-utf32-be-output-string
- open-utf32-le-output-string))
-
-(define call-with-utf32-be-output-string
- (output-string-caller open-utf32-be-output-string))
-
-(define call-with-utf32-le-output-string
- (output-string-caller open-utf32-le-output-string))
-
-(define call-with-utf32-output-string
- (output-string-caller open-utf32-output-string))
-
-(define (utf32-string->wide-string string #!optional start end)
- (if (host-big-endian?)
- (utf32-be-string->wide-string string start end)
- (utf32-le-string->wide-string string start end)))
-
-(define (utf32-be-string->wide-string string #!optional start end)
- (%convert-string string start end
- open-utf32-be-input-string
- open-wide-output-string))
-
-(define (utf32-le-string->wide-string string #!optional start end)
- (%convert-string string start end
- open-utf32-le-input-string
- open-wide-output-string))
-
-(define (string->utf32-string string #!optional start end)
- (if (host-big-endian?)
- (string->utf32-be-string string start end)
- (string->utf32-le-string string start end)))
-
-(define (string->utf32-be-string string #!optional start end)
- (%convert-string string start end
- open-input-string
- open-utf32-be-output-string))
-
-(define (string->utf32-le-string string #!optional start end)
- (%convert-string string start end
- open-input-string
- open-utf32-le-output-string))
-\f
-(define (utf32-string-length string #!optional start end)
- (if (host-big-endian?)
- (utf32-be-string-length string start end)
- (utf32-le-string-length string start end)))
-
-(define (utf32-be-string-length string #!optional start end)
- (%utf32-string-length string start end "32BE" utf32-be-octets->code-point
- 'UTF32-BE-STRING-LENGTH))
-
-(define (utf32-le-string-length string #!optional start end)
- (%utf32-string-length string start end "32LE" utf32-le-octets->code-point
- 'UTF32-LE-STRING-LENGTH))
-
-(define (%utf32-string-length string start end type combiner caller)
- (with-substring-args string start end caller
- (encoded-string-length string start end type caller
- (lambda (string start end)
- (validate-utf32-char string start end combiner)))))
-
-(define (utf32-string-valid? string #!optional start end)
- (if (host-big-endian?)
- (utf32-be-string-valid? string start end)
- (utf32-le-string-valid? string start end)))
-
-(define (utf32-be-string-valid? string #!optional start end)
- (%utf32-string-valid? string start end utf32-be-octets->code-point
- 'UTF32-BE-STRING-VALID?))
-
-(define (utf32-le-string-valid? string #!optional start end)
- (%utf32-string-valid? string start end utf32-le-octets->code-point
- 'UTF32-LE-STRING-VALID?))
-
-(define (%utf32-string-valid? string start end combiner caller)
- (with-substring-args string start end caller
- (encoded-string-valid? string start end
- (lambda (string start end)
- (validate-utf32-char string start end combiner)))))
-
-(define-integrable (utf32-be-octets->code-point b0 b1 b2 b3)
- (+ (* b0 #x01000000)
- (fix:lsh b1 16)
- (fix:lsh b2 8)
- b3))
-
-(define-integrable (utf32-le-octets->code-point b0 b1 b2 b3)
- (+ (* b3 #x01000000)
- (fix:lsh b2 16)
- (fix:lsh b1 8)
- b0))
-
-(define (validate-utf32-char string start end combiner)
-
- (define-integrable (n i)
- (vector-8b-ref string (fix:+ start i)))
-
- (if (fix:< start end)
- (and (fix:<= (fix:+ start 4) end)
- (legal-code-32? (combiner (n 0) (n 1) (n 2) (n 3)))
- (fix:+ start 4))
- start))
-
-(define (utf32-string? object)
- (and (string? object)
- (utf32-string-valid? object)))
-
-(define (utf32-be-string? object)
- (and (string? object)
- (utf32-be-string-valid? object)))
-
-(define (utf32-le-string? object)
- (and (string? object)
- (utf32-le-string-valid? object)))
-
-(define-guarantee utf32-string "UTF-32 string")
-(define-guarantee utf32-be-string "UTF-32BE string")
-(define-guarantee utf32-le-string "UTF-32LE string")
-\f
-;;;; UTF-16 representation
-
-(define open-utf16-be-input-string
- (coded-input-opener 'UTF-16BE))
-
-(define open-utf16-le-input-string
- (coded-input-opener 'UTF-16LE))
-
-(define open-utf16-input-string
- (ended-input-opener open-utf16-be-input-string
- open-utf16-le-input-string))
-
-(define call-with-utf16-be-input-string
- (input-string-caller open-utf16-be-input-string))
-
-(define call-with-utf16-le-input-string
- (input-string-caller open-utf16-le-input-string))
-
-(define call-with-utf16-input-string
- (input-string-caller open-utf16-input-string))
-
-(define open-utf16-be-output-string
- (coded-output-opener 'UTF-16BE))
-
-(define open-utf16-le-output-string
- (coded-output-opener 'UTF-16LE))
-
-(define open-utf16-output-string
- (ended-output-opener open-utf16-be-output-string
- open-utf16-le-output-string))
-
-(define call-with-utf16-be-output-string
- (output-string-caller open-utf16-be-output-string))
-
-(define call-with-utf16-le-output-string
- (output-string-caller open-utf16-le-output-string))
-
-(define call-with-utf16-output-string
- (output-string-caller open-utf16-output-string))
-
-(define (utf16-string->wide-string string #!optional start end)
- (if (host-big-endian?)
- (utf16-be-string->wide-string string start end)
- (utf16-le-string->wide-string string start end)))
-
-(define (utf16-be-string->wide-string string #!optional start end)
- (%convert-string string start end
- open-utf16-be-input-string
- open-wide-output-string))
-
-(define (utf16-le-string->wide-string string #!optional start end)
- (%convert-string string start end
- open-utf16-le-input-string
- open-wide-output-string))
-
-(define (string->utf16-string string #!optional start end)
- (if (host-big-endian?)
- (string->utf16-be-string string start end)
- (string->utf16-le-string string start end)))
-
-(define (string->utf16-be-string string #!optional start end)
- (%convert-string string start end
- open-input-string
- open-utf16-be-output-string))
-
-(define (string->utf16-le-string string #!optional start end)
- (%convert-string string start end
- open-input-string
- open-utf16-le-output-string))
-\f
-(define (utf16-string-length string #!optional start end)
- (if (host-big-endian?)
- (utf16-be-string-length string start end)
- (utf16-le-string-length string start end)))
-
-(define (utf16-be-string-length string #!optional start end)
- (%utf16-string-length string start end "16BE" be-octets->digit16
- 'UTF16-BE-STRING-LENGTH))
-
-(define (utf16-le-string-length string #!optional start end)
- (%utf16-string-length string start end "16LE" le-octets->digit16
- 'UTF16-LE-STRING-LENGTH))
-
-(define (%utf16-string-length string start end type combiner caller)
- (with-substring-args string start end caller
- (encoded-string-length string start end type caller
- (lambda (string start end)
- (validate-utf16-char string start end combiner)))))
-
-(define (utf16-string-valid? string #!optional start end)
- (if (host-big-endian?)
- (utf16-be-string-valid? string start end)
- (utf16-le-string-valid? string start end)))
-
-(define (utf16-be-string-valid? string #!optional start end)
- (%utf16-string-valid? string start end be-octets->digit16
- 'UTF16-BE-STRING-VALID?))
-
-(define (utf16-le-string-valid? string #!optional start end)
- (%utf16-string-valid? string start end le-octets->digit16
- 'UTF16-LE-STRING-VALID?))
-
-(define (%utf16-string-valid? string start end combiner caller)
- (with-substring-args string start end caller
- (encoded-string-valid? string start end
- (lambda (string start end)
- (validate-utf16-char string start end combiner)))))
-\f
-(define (validate-utf16-char string start end combiner)
-
- (define-integrable (n i)
- (vector-8b-ref string (fix:+ start i)))
-
- (if (fix:< start end)
- (and (fix:<= (fix:+ start 2) end)
- (let ((d0 (combiner (n 0) (n 1))))
- (if (utf16-high-surrogate? d0)
- (and (fix:<= (fix:+ start 4) end)
- (utf16-low-surrogate? (combiner (n 2) (n 3)))
- (fix:+ start 4))
- (and (legal-code-16? d0)
- (fix:+ start 2)))))
- start))
-
-(define (be-octets->digit16 b0 b1)
- (fix:or (fix:lsh b0 8) b1))
-
-(define (le-octets->digit16 b0 b1)
- (fix:or (fix:lsh b1 8) b0))
-
-(define (combine-utf16-surrogates h l)
- (guarantee utf16-high-surrogate? h 'combine-utf16-surrogates)
- (guarantee utf16-low-surrogate? l 'combine-utf16-surrogates)
- (fix:+ (fix:+ (fix:lsh (fix:and h #x3FF) 10)
- (fix:and l #x3FF))
- #x10000))
-
-(define (split-into-utf16-surrogates n)
- (guarantee-unicode-scalar-value n 'split-into-utf16-surrogates)
- (let ((n (fix:- n #x10000)))
- (values (fix:or (fix:and (fix:lsh n -10) #x03FF) #xD800)
- (fix:or (fix:and n #x03FF) #xDC00))))
-
-(define (utf16-string? object)
- (and (string? object)
- (utf16-string-valid? object)))
-
-(define (utf16-be-string? object)
- (and (string? object)
- (utf16-be-string-valid? object)))
-
-(define (utf16-le-string? object)
- (and (string? object)
- (utf16-le-string-valid? object)))
-
-(define (utf16-high-surrogate? n)
- (and (index-fixnum? n)
- (fix:= #xD800 (fix:and #xFC00 n))))
-
-(define (utf16-low-surrogate? n)
- (and (index-fixnum? n)
- (fix:= #xDC00 (fix:and #xFC00 n))))
-
-(define-guarantee utf16-string "UTF-16 string")
-(define-guarantee utf16-be-string "UTF-16BE string")
-(define-guarantee utf16-le-string "UTF-16LE string")
-(define-guarantee utf16-high-surrogate "UTF-16 high surrogate")
-(define-guarantee utf16-low-surrogate "UTF-16 low surrogate")
-\f
-;;;; UTF-8 representation
-
-(define open-utf8-input-string
- (coded-input-opener 'UTF-8))
-
-(define call-with-utf8-input-string
- (input-string-caller open-utf8-input-string))
-
-(define open-utf8-output-string
- (coded-output-opener 'UTF-8))
-
-(define call-with-utf8-output-string
- (output-string-caller open-utf8-output-string))
-
-(define (string->utf8-string string #!optional start end)
- (%convert-string string start end
- open-input-string
- open-utf8-output-string))
-
-(define (utf8-string->string string #!optional start end)
- (%convert-string string start end
- open-utf8-input-string
- open-narrow-output-string))
-
-(define (utf8-string->wide-string string #!optional start end)
- (%convert-string string start end
- open-utf8-input-string
- open-wide-output-string))
-
-(define (utf8-string-length string #!optional start end)
- (with-substring-args string start end 'UTF8-STRING-LENGTH
- (encoded-string-length string start end "8" 'UTF8-STRING-LENGTH
- validate-utf8-char)))
-
-(define (utf8-string-valid? string #!optional start end)
- (with-substring-args string start end 'UTF8-STRING-VALID?
- (encoded-string-valid? string start end validate-utf8-char)))
-
-(define (utf8-string? object)
- (and (string? object)
- (utf8-string-valid? object)))
-
-(define-guarantee utf8-string "UTF-8 string")
-\f
-(define (validate-utf8-char string start end)
-
- (define-integrable (check-byte i)
- (%valid-trailer? (n i)))
-
- (define-integrable (n i)
- (vector-8b-ref string (fix:+ start i)))
-
- (if (fix:< start end)
- (let ((b0 (vector-8b-ref string start)))
- (cond ((fix:< b0 #x80)
- (fix:+ start 1))
- ((fix:< b0 #xE0)
- (and (fix:<= (fix:+ start 2) end)
- (check-byte 1)
- (%vs2 b0)
- (fix:+ start 2)))
- ((fix:< b0 #xF0)
- (and (fix:<= (fix:+ start 3) end)
- (check-byte 1)
- (check-byte 2)
- (%vs3 b0 (n 1))
- (legal-code-16? (%cp3 b0 (n 1) (n 2)))
- (fix:+ start 3)))
- ((fix:< b0 #xF8)
- (and (fix:<= (fix:+ start 4) end)
- (check-byte 1)
- (%vs4 b0 (n 1))
- (check-byte 2)
- (check-byte 3)
- (fix:+ start 4)))
- (else #f)))
- start))
-
-(define-integrable (%vs2 b0)
- (fix:> b0 #xC1))
-
-(define-integrable (%vs3 b0 b1)
- (or (fix:> b0 #xE0) (fix:> b1 #x9F)))
-
-(define-integrable (%vs4 b0 b1)
- (or (fix:> b0 #xF0) (fix:> b1 #x8F)))
-
-(define-integrable (%cp3 b0 b1 b2)
- (fix:or (fix:lsh (fix:and b0 #x0F) 12)
- (fix:or (fix:lsh (fix:and b1 #x3F) 6)
- (fix:and b2 #x3F))))
-
-(define-integrable (%valid-trailer? n)
- (fix:= #x80 (fix:and #xC0 n)))
-\f
-;;;; Per-character combination predicates
-
-(define (for-all-chars-in-string? predicate string #!optional start end coding)
- (let ((port (open-string string start end coding 'FOR-ALL-CHARS-IN-STRING?)))
- (let loop ()
- (let ((char (read-char port)))
- (cond ((eof-object? char) #t)
- ((predicate char) (loop))
- (else #f))))))
-
-(define (for-any-char-in-string? predicate string #!optional start end coding)
- (let ((port (open-string string start end coding 'FOR-ANY-CHAR-IN-STRING?)))
- (let loop ()
- (let ((char (read-char port)))
- (cond ((eof-object? char) #f)
- ((predicate char) #t)
- (else (loop)))))))
-
-(define (open-string string start end coding caller)
- ((cond ((default-object? coding)
- open-input-string)
- ((string? string)
- (case coding
- ((UTF-8) open-utf8-input-string)
- ((UTF-16) open-utf16-input-string)
- ((UTF-16BE) open-utf16-be-input-string)
- ((UTF-16LE) open-utf16-le-input-string)
- ((UTF-32) open-utf32-input-string)
- ((UTF-32BE) open-utf32-be-input-string)
- ((UTF-32LE) open-utf32-le-input-string)
- (else (error:bad-range-argument coding caller))))
- ((wide-string? string)
- (error:bad-range-argument coding caller))
- (else
- (error:wrong-type-argument string "string" caller)))
- string start end))
\ No newline at end of file
(define (os/parse-path-string string)
(let ((end (ustring-length string))
- (substring
+ (extract
(lambda (string start end)
(pathname-as-directory (usubstring string start end)))))
(let loop ((start 0))
(if index
(cons (if (= index start)
#f
- (usubstring string start index))
+ (extract string start index))
(loop (+ index 1)))
- (list (usubstring string start end))))
+ (list (extract string start end))))
'()))))
(define (os/shell-file-name)
(if scheme (guarantee-uri-scheme scheme 'MAKE-URI))
(if authority (guarantee-uri-authority authority 'MAKE-URI))
(guarantee-uri-path path 'MAKE-URI)
- (if query (guarantee utf8-string? query 'MAKE-URI))
- (if fragment (guarantee utf8-string? fragment 'MAKE-URI))
+ (if query (guarantee ustring? query 'MAKE-URI))
+ (if fragment (guarantee ustring? fragment 'MAKE-URI))
(if (and authority (pair? path) (path-relative? path))
(error:bad-range-argument path 'MAKE-URI))
(let* ((path (remove-dot-segments path))
;;; an empty segment.
(define (uri-path? object)
- (list-of-type? object utf8-string?))
+ (list-of-type? object ustring?))
(define (uri-path-absolute? path)
(guarantee-uri-path path 'URI-PATH-ABSOLUTE?)
(define (path-absolute? path)
(and (pair? path)
- (fix:= (string-length (car path)) 0)))
+ (fix:= 0 (ustring-length (car path)))))
(define (uri-path-relative? path)
(guarantee-uri-path path 'URI-PATH-RELATIVE?)
(define interned-uri-authorities)
\f
(define (uri-userinfo? object)
- (utf8-string? object))
+ (ustring? object))
(define (uri-host? object)
- (utf8-string? object))
+ (ustring? object))
(define (uri-port? object)
(exact-nonnegative-integer? object))
'()))))
(define (uri-prefix prefix)
- (guarantee utf8-string? prefix 'URI-PREFIX)
+ (guarantee ustring? prefix 'URI-PREFIX)
(lambda (suffix)
- (guarantee utf8-string? suffix 'URI-PREFIX)
- (string->absolute-uri (string-append prefix suffix))))
+ (guarantee ustring? suffix 'URI-PREFIX)
+ (string->absolute-uri (ustring-append prefix suffix))))
\f
(define (remove-dot-segments path)
;; At all times, (APPEND INPUT (REVERSE OUTPUT)) must be well
(if (pair? input)
(let ((segment (car input))
(input (cdr input)))
- (if (or (string=? segment "..")
- (string=? segment "."))
+ (if (or (ustring=? segment "..")
+ (ustring=? segment "."))
;; Rules A and D
(no-output input)
;; Rule E
(if (pair? input)
(let ((segment (car input))
(input (cdr input)))
- (cond ((string=? segment ".")
+ (cond ((ustring=? segment ".")
;; Rule B
(maybe-done input output))
- ((string=? segment "..")
+ ((ustring=? segment "..")
;; Rule C
(maybe-done input
(if (pair? (cdr output))
(do-string
(lambda (string)
(or (hash-table/get interned-uris string #f)
- (do-parse (utf8-string->wide-string string))))))
+ (do-parse string)))))
(cond ((uri? object)
(if (predicate object)
object
(begin
(if caller (error:bad-range-argument object caller))
#f)))
- ((string? object)
+ ((ustring? object)
(do-string object))
((symbol? object)
- (do-string (symbol-name object)))
- ((wide-string? object)
- (let ((string (string->utf8-string object)))
- (or (hash-table/get interned-uris string #f)
- (do-parse object))))
+ (do-string (symbol->string object)))
(else
(if caller (error:not-uri object caller))
#f))))
(%string->uri parse-relative-uri string start end 'STRING->RELATIVE-URI))
(define (%string->uri parser string start end caller)
- (or (and (string? string)
+ (or (and (ustring? string)
(default-object? start)
(default-object? end)
(hash-table/get interned-uris string #f))
\f
(define parser:hostport
(*parser
- (seq (map uri-string-downcase
+ (seq (map ustring-downcase
(alt (match matcher:ip-literal)
;; subsumed by MATCHER:REG-NAME
;;matcher:ipv4-address
(match (+ (char-set char-set:uri-digit)))))
(values #f)))))
-;; This is a kludge to work around fact that STRING-DOWNCASE only
-;; works on ISO 8859-1 strings, and we are using UTF-8 strings.
-
-(define (uri-string-downcase string)
- (call-with-utf8-output-string
- (lambda (output)
- (let ((input (open-utf8-input-string string)))
- (let loop ()
- (let ((char (read-char input)))
- (if (not (eof-object? char))
- (begin
- (write-char (char-downcase char) output)
- (loop)))))))))
-
(define matcher:ip-literal
(*matcher
(seq "["
(write-encoded segment char-set:uri-segment port))
(define (encode-uri-path-segment segment)
- (guarantee-string segment 'ENCODE-URI-PATH-SEGMENT)
(call-with-output-string
(lambda (port)
(write-segment segment port))))
(char-set char-set:uri-hex))))
(define (decode-component string)
- (if (string-find-next-char string #\%)
+ (if (ustring-find-first-char string #\%)
(call-with-output-string
(lambda (port)
- (let ((end (string-length string)))
+ (let ((end (ustring-length string)))
(let loop ((i 0))
(if (fix:< i end)
- (if (char=? (string-ref string i) #\%)
+ (if (char=? #\% (ustring-ref string i))
(begin
(write-char (integer->char
- (substring->number string
- (fix:+ i 1)
- (fix:+ i 3)
- 16
- #t))
+ (string->number string
+ 16
+ #t
+ (fix:+ i 1)
+ (fix:+ i 3)))
port)
(loop (fix:+ i 3)))
(begin
- (write-char (string-ref string i) port)
+ (write-char (ustring-ref string i) port)
(loop (fix:+ i 1)))))))))
string))
(define (write-encoded string unescaped port)
- (write-encoded-substring string 0 (string-length string) unescaped port))
+ (write-encoded-substring string 0 (ustring-length string) unescaped port))
(define (write-encoded-substring string start end unescaped port)
(do ((i start (fix:+ i 1)))
((not (fix:< i end)))
- (let ((char (string-ref string i)))
+ (let ((char (ustring-ref string i)))
(if (char-set-member? unescaped char)
(write-char char port)
(begin
(write-char #\% port)
(write-string (string-pad-left
- (string-upcase (number->string (char->integer char)
- 16))
+ (ustring-upcase (number->string (char->integer char)
+ 16))
2
#\0)
port))))))
(actions (cdr clause)))
`(,(cond ((eq? key 'EOF)
`(EOF-OBJECT? CHAR))
- ((fix:= (string-length (symbol-name key)) 1)
+ ((fix:= 1 (string-length (symbol-name key)))
`(CHAR=? CHAR ,(string-ref (symbol-name key) 0)))
(else
`(CHAR-SET-MEMBER? ,(symbol 'CHAR-SET:URI- key) CHAR)))
(set-predicate<=! utf32-string? ustring?)
(register-predicate! ->ustring-component? '->ustring-component))
+(define (make-ustring k #!optional char)
+ (guarantee index-fixnum? k 'make-ustring)
+ (if (fix:> k 0)
+ (make-utf32-string k char)
+ (make-legacy-string 0)))
+
(define (ustring-length string)
(cond ((legacy-string? string) (legacy-string-length string))
((utf32-string? string) (utf32-string-length string))
(else (error:not-a ustring? string 'ustring-set!))))
\f
(define (ustring-append . strings)
+ (%ustring-append* strings))
+
+(define (ustring-append* strings)
+ (guarantee list? strings 'ustring-append*)
+ (%ustring-append* strings))
+
+(define (%ustring-append* strings)
(let ((string
(do ((strings strings (cdr strings))
(n 0 (fix:+ n (ustring-length (car strings))))
(%ustring* objects 'ustring*))
(define (%ustring* objects caller)
- (apply ustring-append
- (map (lambda (object)
- (->ustring object caller))
- objects)))
+ (%ustring-append*
+ (map (lambda (object)
+ (->ustring object caller))
+ objects)))
(define (->ustring object caller)
(cond ((not object) "")
(define (string-for-primitive string)
(or (ustring->ascii string)
- (string->utf8 string)))
-
-;; temporary scaffolding
-(define (ustring->utf8-string string #!optional start end)
- (let* ((caller 'ustring->utf8-string)
- (end (fix:end-index end (ustring-length string) caller))
- (start (fix:start-index start end caller)))
- (cond ((legacy-string? string)
- (if (%legacy-string-ascii? string start end)
- (legacy-string-copy string start end)
- (%string->utf8-string string start end)))
- ((utf32-string? string)
- (if (%utf32-string-ascii? string start end)
- (%utf32-string->ascii string start end)
- (%string->utf8-string string start end)))
- (else
- (error:not-a ustring? string caller)))))
-
-(define (%string->utf8-string string start end)
- (object-new-type (ucode-type string) (string->utf8 string start end)))
-
-;; temporary scaffolding
-(define (utf8-string->ustring string #!optional start end)
- (utf8->string (legacy-string->bytevector string) start end))
\ No newline at end of file
+ (string->utf8 string)))
\ No newline at end of file
(*parser (map intern (match match-language))))
\f
(define (parse-string b)
- (let ((port (open-utf8-output-string)))
+ (let ((port (open-output-string)))
(define (loop)
(let ((p (get-parser-buffer-pointer b)))
(write-string (symbol-name lang) port)))))
(define (write-rdf/nt-literal-text text port)
- (let ((text (open-utf8-input-string text)))
+ (let ((text (open-input-string text)))
(write-string "\"" port)
(let loop ()
(let ((char (read-char text)))
(define-guarantee rdf-literal "RDF literal")
(define (make-rdf-literal text type)
- (guarantee utf8-string? text 'MAKE-RDF-LITERAL)
+ (guarantee ustring? text 'MAKE-RDF-LITERAL)
(let ((type
(if (or (not type)
(language? type))
(define (make-rdf-qname prefix local)
(guarantee-rdf-prefix prefix 'MAKE-RDF-QNAME)
- (guarantee utf8-string? local 'MAKE-RDF-QNAME)
- (if (not (*match-utf8-string match:name local))
+ (guarantee ustring? local 'MAKE-RDF-QNAME)
+ (if (not (*match-string match:name local))
(error:bad-range-argument local 'MAKE-RDF-QNAME))
(symbol prefix local))
(define (delimited-region-parser name start-delim end-delim
char-set parse-escapes)
(lambda (buffer)
- (let ((output (open-utf8-output-string))
+ (let ((output (open-output-string))
(start (get-parser-buffer-pointer buffer)))
(define (read-head)
(else #f))))
((rdf-bnode? o)
(and (not (inline-bnode o))
- (call-with-utf8-output-string
+ (call-with-output-string
(lambda (port)
(write-rdf/nt-bnode o port)))))
((uri? o)
- (call-with-utf8-output-string
+ (call-with-output-string
(lambda (port*)
(write-uri o (port/rdf-prefix-registry port) port*))))
((rdf-graph? o)
(and (null? (rdf-graph-triples o))
"{}"))
((rdf-literal? o)
- (call-with-utf8-output-string
+ (call-with-output-string
(lambda (port)
(write-rdf/turtle-literal o port))))
(else
(define (write-literal-text text port)
(if (string-find-next-char text #\newline)
- (let ((tport (open-utf8-input-string text)))
+ (let ((tport (open-input-string text)))
(write-string "\"\"\"" port)
(let loop ()
(let ((char (read-char tport)))
(define (string-matcher matcher)
(lambda (string #!optional start end)
- (matcher (utf8-string->parser-buffer string start end))))
+ (matcher (string->parser-buffer string start end))))
(define string-is-xml-qname? (string-matcher match:xml-qname))
(define string-is-xml-name? (string-matcher match:xml-name))
(set-coding xml port)
(write-xml-1 xml port options))))
-(define (xml->wide-string xml . options)
- (call-with-wide-output-string
- (lambda (port)
- (write-xml-1 xml port options))))
-
(define (set-coding xml port)
(if (port/supports-coding? port)
(let ((coding
(define (emit-string string ctx)
(let ((port (ctx-port ctx)))
- (for-each-unicode-char string
- (lambda (char)
- (write-char char port)))))
+ (ustring-for-each (lambda (char)
+ (write-char char port))
+ string)))
(define (emit-newline ctx)
(newline (ctx-port ctx)))
(define (xml-string-columns string)
(let ((n 0))
- (for-each-unicode-char string
- (lambda (char)
- (set! n
- (fix:+ n
- (case char
- ((#\") 6)
- ((#\<) 4)
- ((#\&) 5)
- (else 1))))
- unspecific))
+ (ustring-for-each (lambda (char)
+ (set! n
+ (fix:+ n
+ (case char
+ ((#\") 6)
+ ((#\<) 4)
+ ((#\&) 5)
+ (else 1))))
+ unspecific)
+ string)
n))
\f
(define (write-xml-name name ctx)
(emit-string (xml-name-string name) ctx))
(define (xml-name-columns name)
- (utf8-string-length (xml-name-string name)))
+ (ustring-length (xml-name-string name)))
(define (write-xml-nmtoken nmtoken ctx)
(emit-string (symbol-name nmtoken) ctx))
(emit-char #\space ctx)))
(define (write-escaped-string string escapes ctx)
- (for-each-unicode-char string
- (lambda (char)
- (cond ((assq char escapes)
- => (lambda (e)
- (emit-string (cdr e) ctx)))
- (((ctx-char-map ctx) char)
- => (lambda (name)
- (emit-char #\& ctx)
- (emit-string (symbol-name name) ctx)
- (emit-char #\; ctx)))
- (else
- (emit-char char ctx))))))
-
-(define (for-each-unicode-char string procedure)
- (let ((port (open-utf8-input-string string)))
- (let loop ()
- (let ((char (read-char port)))
- (if (not (eof-object? char))
- (begin
- (procedure char)
- (loop)))))))
\ No newline at end of file
+ (ustring-for-each (lambda (char)
+ (cond ((assq char escapes)
+ => (lambda (e)
+ (emit-string (cdr e) ctx)))
+ (((ctx-char-map ctx) char)
+ => (lambda (name)
+ (emit-char #\& ctx)
+ (emit-string (symbol-name name) ctx)
+ (emit-char #\; ctx)))
+ (else
+ (emit-char char ctx))))
+ string))
\ No newline at end of file
'ANY)
(guarantee-pi-handlers pi-handlers 'STRING->XML)))
-(define (utf8-string->xml string #!optional start end pi-handlers)
- (parse-xml (utf8-string->parser-buffer string start end)
- 'UTF-8
- (guarantee-pi-handlers pi-handlers 'UTF8-STRING->XML)))
-
(define (guarantee-pi-handlers object caller)
(if (default-object? object)
'()
(char->integer c))))
(prefix
(lambda (n)
- (wide-string (integer->char n))))
+ (ustring (integer->char n))))
(lose
(lambda bytes
(error "Illegal starting bytes:" bytes))))
(let ((char (integer->char n)))
(if (not (char-set-member? char-set:xml-char char))
(perror p "Disallowed Unicode character" char))
- (call-with-utf8-output-string
+ (call-with-output-string
(lambda (port)
(write-char char port))))))))
(*parser
;;;; Normalization
(define (normalize-attribute-value string)
- (call-with-utf8-output-string
+ (call-with-output-string
(lambda (port)
(let normalize-string ((string string))
- (let ((b (utf8-string->parser-buffer (normalize-line-endings string))))
+ (let ((b (string->parser-buffer (normalize-line-endings string))))
(let loop ()
(let* ((p (get-parser-buffer-pointer b))
(char (read-parser-buffer-char b)))
(loop))))))))))
(define (trim-attribute-whitespace string)
- (call-with-utf8-output-string
+ (call-with-output-string
(lambda (port)
(let ((string (string-trim string)))
(let ((end (string-length string)))
(let ((v
(expand-entity-value name p
(lambda ()
- (*parse-utf8-string parse-content string)))))
+ (*parse-string parse-content string)))))
(if (not v)
(perror p "Malformed entity reference" string))
v))
(string? (vector-ref v 0)))
(let ((v*
(fluid-let ((*external-expansion?* #t))
- (*parse-utf8-string parser (vector-ref v 0)))))
+ (*parse-string parser (vector-ref v 0)))))
(if (not v*)
(perror ptr
(string-append "Malformed " description)
'encode-value))))))
(define (encode-string string)
- (if (and (utf8-string-valid? string)
- (string-of-xml-chars? string))
+ (if (string-of-xml-chars? string)
string
(rpc-elt:base64
(call-with-output-string
(define (xml-char-data? object)
(or (unicode-char? object)
- (and (or (wide-string? object)
- (and (string? object)
- (utf8-string-valid? object)))
+ (and (ustring? object)
(string-of-xml-chars? object))))
(define (string-of-xml-chars? string)
- (for-all-chars-in-string? (char-set-predicate char-set:xml-char)
- string
- 0
- (string-length string)
- 'UTF-8))
+ (ustring-every (char-set-predicate char-set:xml-char) string))
(define (canonicalize-char-data object)
(cond ((unicode-char? object)
- (call-with-utf8-output-string
+ (call-with-output-string
(lambda (port)
(write-char object port))))
- ((wide-string? object)
- (string->utf8-string object))
- ((string? object)
- (cond ((not (utf8-string-valid? object))
- (error:wrong-type-datum object "valid UTF-8 XML char data"))
- ((not (string-of-xml-chars? object))
- (error:wrong-type-datum object "well-formed XML char data"))
- (else object)))
+ ((ustring? object)
+ (if (not (string-of-xml-chars? object))
+ (error:wrong-type-datum object "well-formed XML char data"))
+ object)
((uri? object)
(uri->string object))
- (else (error:wrong-type-datum object "an XML char data"))))
+ (else
+ (error:wrong-type-datum object "an XML char data"))))
(define-xml-type element
(name xml-name?)
(define (xml-comment . strings)
(make-xml-comment
- (let* ((s (apply string-append (map canonicalize-char-data strings)))
- (ws (utf8-string->wide-string s))
- (n (wide-string-length ws)))
+ (let* ((s (apply ustring-append (map canonicalize-char-data strings)))
+ (n (ustring-length s)))
(if (fix:> n 0)
- (string-append
- (if (char-whitespace? (wide-string-ref ws 0)) "" " ")
+ (ustring-append
+ (if (char-whitespace? (ustring-ref s 0)) "" " ")
s
- (if (char-whitespace? (wide-string-ref ws (fix:- n 1))) "" " "))
+ (if (char-whitespace? (ustring-ref s (fix:- n 1))) "" " "))
" "))))
(define (xml-stylesheet . items)
(make-xml-processing-instructions
'xml-stylesheet
- (call-with-utf8-output-string
+ (call-with-output-string
(lambda (port)
(for-each (lambda (attr)
(write-char #\space port)
read-xml
read-xml-file
string->xml
- utf8-string->xml
xml-processing-instructions-handlers)
(export (runtime xml)
coding-requires-bom?
(xml->string xml->octets)
write-xml
write-xml-file
- xml->octets
- xml->wide-string))
+ xml->octets))
(define-package (runtime xml html)
(files "xhtml" "xhtml-entities")
(xml-element-name (node-item node)))
(define-method node-string ((node <element-node>))
- (call-with-utf8-output-string
+ (call-with-output-string
(lambda (port)
(let loop ((node node))
(stream-for-each (lambda (child)