From 588acda14c19245e22a084544c7564606c7221c2 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Sun, 30 Aug 2009 02:17:16 -0700 Subject: [PATCH] Refactor XML library to support names that don't conform to XML Namespaces. This was extensive mostly because there was a built-in assumption that all XML names could be mapped to QNames, which isn't true. Some incompatible changes: renamed: XML-NAME-QNAME ==> XML-NAME->SYMBOL eliminated: MAKE-XML-NAME-HASH-TABLE XML-NAME-HASH XML-NAME-QNAME=? XML-NMTOKEN-STRING XML-QNAME-STRING --- doc/ref-manual/io.texi | 20 +-- src/xdoc/xdoc.scm | 2 +- src/xml/xml-names.scm | 314 +++++++++++++++++++---------------------- src/xml/xml-output.scm | 2 +- src/xml/xml-parser.scm | 118 +++++++--------- src/xml/xml-struct.scm | 42 +++--- src/xml/xml.pkg | 43 +++--- src/xml/xpath.scm | 8 +- 8 files changed, 260 insertions(+), 289 deletions(-) diff --git a/doc/ref-manual/io.texi b/doc/ref-manual/io.texi index 84c7982fe..b0fd8fbb6 100644 --- a/doc/ref-manual/io.texi +++ b/doc/ref-manual/io.texi @@ -3054,8 +3054,8 @@ Returns @code{#t} if @var{object} is an @acronym{XML} name, and @code{#f} otherwise. @end deffn -@deffn procedure xml-name-qname xml-name -Returns the @dfn{qname} of @var{xml-name} as a symbol. +@deffn procedure xml-name->symbol xml-name +Returns the symbol part of @var{xml-name}. @end deffn @deffn procedure xml-name-uri xml-name @@ -3067,7 +3067,7 @@ Returns the @dfn{URI} of @var{xml-name}. The result always satisfies Returns the @dfn{qname} of @var{xml-name} as a string. Equivalent to @example -(symbol-name (xml-name-qname @var{xml-name})) +(symbol-name (xml-name->symbol @var{xml-name})) @end example @end deffn @@ -3118,11 +3118,6 @@ Returns @code{#t} if @var{object} is a qname, otherwise returns @code{#f}. @end deffn -@deffn procedure xml-qname-string qname -Returns a newly allocated string that is a copy of @var{qname}'s string. -Roughly equivalent to @code{symbol->utf8-string}. -@end deffn - @deffn procedure xml-qname-prefix qname Returns the prefix of @var{qname} as a symbol. @end deffn @@ -3180,9 +3175,6 @@ with the @samp{xmlns} prefix. @deffn procedure xml-nmtoken? object @end deffn -@deffn procedure xml-nmtoken-string xml-nmtoken -@end deffn - @deffn procedure string-is-xml-name? string @end deffn @@ -3191,12 +3183,6 @@ with the @samp{xmlns} prefix. @end deffn -@deffn procedure make-xml-name-hash-table [initial-size] -@end deffn - -@deffn procedure xml-name-hash xml-name modulus -@end deffn - @node XML Structure, , XML Names, XML Support @subsection XML Structure diff --git a/src/xdoc/xdoc.scm b/src/xdoc/xdoc.scm index f2adf3aac..58b07dcf6 100644 --- a/src/xdoc/xdoc.scm +++ b/src/xdoc/xdoc.scm @@ -375,7 +375,7 @@ USA. (hash-table/get html-generators (xdoc-element-name item) #f)) (define html-generators - (make-xml-name-hash-table)) + (make-eq-hash-table)) (define (generate-container-items items extra-content?) (generate-container-groups diff --git a/src/xml/xml-names.scm b/src/xml/xml-names.scm index 2037815c0..82de2a626 100644 --- a/src/xml/xml-names.scm +++ b/src/xml/xml-names.scm @@ -23,39 +23,36 @@ USA. |# -;;;; XML name structures +;;;; XML names (declare (usual-integrations)) -(define (make-xml-name qname #!optional uri) - (let ((qname (make-xml-qname qname)) - (uri-string - (cond ((default-object? uri) (null-xml-namespace-uri)) - ((string? uri) uri) - ((wide-string? uri) (wide-string->utf8-string uri)) - ((symbol? uri) (symbol-name uri)) - ((uri? uri) (uri->string uri)) - (else (error:not-uri uri 'MAKE-XML-NAME))))) - (string->uri uri-string) ;signals error if not URI - (if (string-null? uri-string) - qname +(define (make-xml-name name #!optional uri) + (let ((name-symbol (make-xml-name-symbol name)) + (uri + (if (default-object? uri) + (null-xml-namespace-uri) + (->absolute-uri uri 'MAKE-XML-NAME)))) + (if (null-xml-namespace-uri? uri) + name-symbol (begin - (if (not (case (xml-qname-prefix qname) - ((xml) (string=? uri-string xml-uri-string)) - ((xmlns) (string=? uri-string xmlns-uri-string)) + (guarantee-xml-qname name-symbol 'MAKE-XML-NAME) + (if (not (case (xml-qname-prefix name-symbol) + ((xml) (uri=? uri xml-uri)) + ((xmlns) (uri=? uri xmlns-uri)) (else #t))) - (error:bad-range-argument uri-string 'MAKE-XML-NAME)) - (%make-xml-name qname uri-string))))) + (error:bad-range-argument uri 'MAKE-XML-NAME)) + (%make-xml-name name-symbol uri))))) -(define (%make-xml-name qname uri-string) +(define (%make-xml-name qname uri) (let ((uname (let ((local (xml-qname-local qname))) (hash-table/intern! (hash-table/intern! expanded-names - uri-string + uri make-eq-hash-table) local (lambda () - (make-expanded-name uri-string + (make-expanded-name uri local (make-eq-hash-table))))))) (hash-table/intern! (expanded-name-combos uname) @@ -63,111 +60,137 @@ USA. (lambda () (make-combo-name qname uname))))) (define expanded-names - (make-string-hash-table)) + (make-eq-hash-table)) (define (xml-name? object) - (or (xml-qname? object) + (or (xml-name-symbol? object) (combo-name? object))) (define-guarantee xml-name "an XML Name") -(define (null-xml-namespace-uri? object) - (and (uri? object) - (uri=? object null-namespace-uri))) +(define (xml-name-string name) + (symbol-name (xml-name->symbol name))) -(define (null-xml-namespace-uri) - null-namespace-uri) +(define (xml-name->symbol name) + (cond ((xml-name-symbol? name) name) + ((combo-name? name) (combo-name-qname name)) + (else (error:not-xml-name name 'XML-NAME->SYMBOL)))) -(define null-namespace-uri (->uri "")) -(define xml-uri-string "http://www.w3.org/XML/1998/namespace") -(define xml-uri (->uri xml-uri-string)) -(define xmlns-uri-string "http://www.w3.org/2000/xmlns/") -(define xmlns-uri (->uri xmlns-uri-string)) +(define (xml-name=? n1 n2) + (if (and (combo-name? n1) (combo-name? n2)) + (eq? (combo-name-expanded n1) (combo-name-expanded n2)) + (eq? (xml-name->symbol n1) (xml-name->symbol n2)))) + +(define-record-type + (make-combo-name qname expanded) + combo-name? + (qname combo-name-qname) + (expanded combo-name-expanded)) + +(set-record-type-unparser-method! + (standard-unparser-method 'XML-NAME + (lambda (name port) + (write-char #\space port) + (write (combo-name-qname name) port)))) + +(define-record-type + (make-expanded-name uri local combos) + expanded-name? + (uri expanded-name-uri) + (local expanded-name-local) + (combos expanded-name-combos)) -(define (make-xml-nmtoken object) - (if (string? object) - (begin - (if (not (string-is-xml-nmtoken? object)) - (error:bad-range-argument object 'MAKE-XML-NMTOKEN)) - (utf8-string->symbol object)) - (begin - (guarantee-xml-nmtoken object 'MAKE-XML-NMTOKEN) - object))) - -(define (xml-nmtoken? object) - (and (symbol? object) - (string-is-xml-nmtoken? (symbol-name object)))) +;;;; Symbolic names + +(define (name-matcher initial subsequent) + (lambda (buffer) + (and (match-parser-buffer-char-in-alphabet buffer initial) + (let loop () + (if (match-parser-buffer-char-in-alphabet buffer subsequent) + (loop) + #t))))) + +(define match-ncname + (name-matcher alphabet:ncname-initial + alphabet:ncname-subsequent)) + +(define match:xml-name + (name-matcher alphabet:name-initial + alphabet:name-subsequent)) + +(define match:xml-nmtoken + (name-matcher alphabet:name-subsequent + alphabet:name-subsequent)) + +(define match:xml-qname + (*matcher (seq match-ncname (? (seq ":" match-ncname))))) + +(define (string-matcher matcher) + (lambda (string #!optional start end) + (matcher (utf8-string->parser-buffer string start end)))) + +(define string-is-xml-qname? (string-matcher match:xml-qname)) +(define string-is-xml-name? (string-matcher match:xml-name)) +(define string-is-xml-nmtoken? (string-matcher match:xml-nmtoken)) + +(define (name-constructor string-predicate constructor) + (lambda (object) + (if (string? object) + (begin + (if (not (string-predicate object)) + (error:bad-range-argument object constructor)) + (utf8-string->symbol object)) + (begin + (guarantee-symbol object constructor) + (if (not (string-predicate (symbol-name object))) + (error:bad-range-argument object constructor)) + object)))) -(define-guarantee xml-nmtoken "an XML name token") +(define make-xml-name-symbol + (name-constructor string-is-xml-name? 'MAKE-XML-NAME-SYMBOL)) -(define (xml-nmtoken-string nmtoken) - (guarantee-xml-nmtoken nmtoken 'XML-NMTOKEN-STRING) - (symbol-name nmtoken)) +(define make-xml-nmtoken + (name-constructor string-is-xml-nmtoken? 'MAKE-XML-NMTOKEN)) -(define (string-is-xml-qname? string) - (let ((end (string-length string))) - (let ((c (substring-find-next-char string 0 end #\:))) - (if c - (and (not (substring-find-next-char string (fix:+ c 1) end #\:)) - (string-is-xml-name? string 0 c) - (string-is-xml-name? string (fix:+ c 1) end)) - (string-is-xml-name? string 0 end))))) - -(define (string-is-xml-name? string #!optional start end) - (eq? (string-is-xml-nmtoken? string start end) 'NAME)) - -(define (string-is-xml-nmtoken? string #!optional start end) - (let ((buffer (utf8-string->parser-buffer string start end))) - (letrec - ((match-tail - (lambda () - (if (peek-parser-buffer-char buffer) - (and (match-parser-buffer-char-in-alphabet - buffer alphabet:name-subsequent) - (match-tail)) - #t)))) - (if (match-parser-buffer-char-in-alphabet buffer alphabet:name-initial) - (and (match-tail) - 'NAME) - (and (match-parser-buffer-char-in-alphabet buffer - alphabet:name-subsequent) - (match-tail) - 'NMTOKEN))))) - -(define (string-composed-of? string char-set) - (and (string? string) - (substring-composed-of? string 0 (string-length string) char-set))) - -(define (substring-composed-of? string start end char-set) - (let loop ((index start)) - (or (fix:= index end) - (and (char-set-member? char-set (string-ref string index)) - (loop (fix:+ index 1)))))) +(define make-xml-qname + (name-constructor string-is-xml-qname? 'MAKE-XML-QNAME)) + +(define (name-predicate string-predicate) + (lambda (object) + (and (symbol? object) + (string-predicate (symbol-name object))))) + +(define xml-name-symbol? (name-predicate string-is-xml-name?)) +(define xml-nmtoken? (name-predicate string-is-xml-nmtoken?)) +(define xml-qname? (name-predicate string-is-xml-qname?)) + +(define-guarantee xml-name-symbol "an XML name symbol") +(define-guarantee xml-nmtoken "an XML name token") +(define-guarantee xml-qname "an XML QName") -(define (xml-name-string name) - (symbol-name (xml-name-qname name))) +;;;; Namespace support -(define (xml-name-qname name) - (cond ((xml-qname? name) name) - ((combo-name? name) (combo-name-qname name)) - (else (error:not-xml-name name 'XML-NAME-QNAME)))) +(define (xml-namespace-conformant-name? object) + (or (xml-qname? object) + (combo-name? object))) -(define (xml-name-qname=? name qname) - (eq? (xml-name-qname name) qname)) +(define-guarantee xml-namespace-conformant-name + "XML Namespaces conformant name") (define (xml-name-uri name) - (cond ((xml-qname? name) "") + (cond ((xml-qname? name) (null-xml-namespace-uri)) ((combo-name? name) (expanded-name-uri (combo-name-expanded name))) - (else (error:not-xml-name name 'XML-NAME-URI)))) + (else (error:not-xml-namespace-conformant-name name 'XML-NAME-URI)))) (define (xml-name-uri=? name uri) (uri=? (xml-name-uri name) uri)) (define (xml-name-prefix name) - (xml-qname-prefix + (%xml-qname-prefix (cond ((xml-qname? name) name) ((combo-name? name) (combo-name-qname name)) - (else (error:not-xml-name name 'XML-NAME-PREFIX))))) + (else + (error:not-xml-namespace-conformant-name name 'XML-NAME-PREFIX))))) (define (null-xml-name-prefix? object) (eq? object '||)) @@ -179,85 +202,44 @@ USA. (eq? (xml-name-prefix name) prefix)) (define (xml-name-local name) - (cond ((xml-qname? name) (xml-qname-local name)) + (cond ((xml-qname? name) (%xml-qname-local name)) ((combo-name? name) (expanded-name-local (combo-name-expanded name))) - (else (error:not-xml-name name 'XML-NAME-LOCAL)))) + (else (error:not-xml-namespace-conformant-name name 'XML-NAME-LOCAL)))) (define (xml-name-local=? name local) (eq? (xml-name-local name) local)) -(define (xml-name=? n1 n2) - (let ((lose (lambda (n) (error:not-xml-name n 'XML-NAME=?)))) - (cond ((xml-qname? n1) - (cond ((xml-qname? n2) (eq? n1 n2)) - ((combo-name? n2) (eq? n1 (combo-name-qname n2))) - (else (lose n2)))) - ((combo-name? n1) - (cond ((xml-qname? n2) - (eq? (combo-name-qname n1) n2)) - ((combo-name? n2) - (eq? (combo-name-expanded n1) - (combo-name-expanded n2))) - (else (lose n2)))) - (else (lose n1))))) - -(define (xml-name-hash name modulus) - (eq-hash-mod (xml-name-local name) modulus)) - -(define make-xml-name-hash-table - (strong-hash-table/constructor xml-name-hash xml-name=? #t)) - -(define (make-xml-qname object) - (if (string? object) - (begin - (if (not (string-is-xml-qname? object)) - (error:bad-range-argument object 'MAKE-XML-QNAME)) - (utf8-string->symbol object)) - (begin - (guarantee-xml-qname object 'MAKE-XML-QNAME) - object))) - -(define (xml-qname? object) - (and (interned-symbol? object) - (string-is-xml-qname? (symbol-name object)))) - -(define-guarantee xml-qname "an XML QName") +(define (null-xml-namespace-uri? object) + (and (uri? object) + (uri=? object null-namespace-uri))) -(define (xml-qname-string qname) - (guarantee-xml-qname qname 'XML-QNAME-STRING) - (symbol->utf8-string qname)) +(define (null-xml-namespace-uri) + null-namespace-uri) -(define (xml-qname-local qname) - (guarantee-xml-qname qname 'XML-QNAME-LOCAL) - (let ((s (symbol-name qname))) - (let ((c (string-find-next-char s #\:))) - (if c - (utf8-string->symbol (string-tail s (fix:+ c 1))) - qname)))) +(define null-namespace-uri (->uri "")) +(define xml-uri-string "http://www.w3.org/XML/1998/namespace") +(define xml-uri (->uri xml-uri-string)) +(define xmlns-uri-string "http://www.w3.org/2000/xmlns/") +(define xmlns-uri (->uri xmlns-uri-string)) (define (xml-qname-prefix qname) (guarantee-xml-qname qname 'XML-QNAME-PREFIX) + (%xml-qname-prefix qname)) + +(define (%xml-qname-prefix qname) (let ((s (symbol-name qname))) (let ((c (string-find-next-char s #\:))) (if c (utf8-string->symbol (string-head s c)) (null-xml-name-prefix))))) -(define-record-type - (make-combo-name qname expanded) - combo-name? - (qname combo-name-qname) - (expanded combo-name-expanded)) - -(set-record-type-unparser-method! - (standard-unparser-method 'XML-NAME - (lambda (name port) - (write-char #\space port) - (write (combo-name-qname name) port)))) +(define (xml-qname-local qname) + (guarantee-xml-qname qname 'XML-QNAME-LOCAL) + (%xml-qname-local qname)) -(define-record-type - (make-expanded-name uri local combos) - expanded-name? - (uri expanded-name-uri) - (local expanded-name-local) - (combos expanded-name-combos)) \ No newline at end of file +(define (%xml-qname-local qname) + (let ((s (symbol-name qname))) + (let ((c (string-find-next-char s #\:))) + (if c + (utf8-string->symbol (string-tail s (fix:+ c 1))) + qname)))) \ No newline at end of file diff --git a/src/xml/xml-output.scm b/src/xml/xml-output.scm index 925348216..c9ab0c403 100644 --- a/src/xml/xml-output.scm +++ b/src/xml/xml-output.scm @@ -432,7 +432,7 @@ USA. (utf8-string-length (xml-name-string name))) (define (write-xml-nmtoken nmtoken ctx) - (emit-string (xml-nmtoken-string nmtoken) ctx)) + (emit-string (symbol-name nmtoken) ctx)) (define (write-entity-value value col ctx) (if (xml-external-id? value) diff --git a/src/xml/xml-parser.scm b/src/xml/xml-parser.scm index 7c22d30c6..dd44c0afb 100644 --- a/src/xml/xml-parser.scm +++ b/src/xml/xml-parser.scm @@ -388,7 +388,7 @@ USA. (if (there-exists? (cdr attrs) (lambda (attr) (xml-name=? (xml-attribute-name attr) name))) - (perror p "Attributes with same name" (xml-name-qname name))))))) + (perror p "Attributes with same name" (xml-name->symbol name))))))) (define (parse-element-content b p name) (let ((vc (parse-content b))) @@ -399,8 +399,8 @@ USA. (if (peek-parser-buffer-char b) (perror (get-parser-buffer-pointer b) "Unknown content") (perror p "Unterminated start tag" name))) - (if (not (eq? (xml-name-qname (vector-ref ve 0)) - (xml-name-qname name))) + (if (not (eq? (xml-name->symbol (vector-ref ve 0)) + (xml-name->symbol name))) (perror p "Mismatched start tag" (vector-ref ve 0) name)) (let ((content (coalesce-strings! (vector->list vc)))) (if (null? content) @@ -429,12 +429,12 @@ USA. ;;;; Attribute defaulting -(define (process-attr-decls qname attrs p) +(define (process-attr-decls name attrs p) (let ((decl (and (or *standalone?* *internal-dtd?*) (find-matching-item *att-decls* (lambda (decl) - (xml-name=? (xml-!attlist-name decl) qname)))))) + (xml-name=? (xml-!attlist-name decl) name)))))) (if decl (do ((defns (xml-!attlist-definitions decl) (cdr defns)) (attrs attrs (process-attr-defn (car defns) attrs p))) @@ -442,27 +442,27 @@ USA. attrs))) (define (process-attr-defn defn attrs p) - (let ((qname (car defn)) + (let ((name (car defn)) (type (cadr defn)) (default (caddr defn))) (let ((attr (find-matching-item attrs (lambda (attr) - (xml-name=? (car (xml-attribute-name attr)) qname))))) + (xml-name=? (car (xml-attribute-name attr)) name))))) (if attr (let ((av (xml-attribute-value attr))) (if (and (pair? default) (eq? (car default) '|#FIXED|) (not (string=? av (cdr default)))) - (perror (cdar attr) "Incorrect attribute value" qname)) + (perror (cdar attr) "Incorrect attribute value" name)) (if (not (eq? type '|CDATA|)) (set-xml-attribute-value! attr (trim-attribute-whitespace av))) attrs) (begin (if (eq? default '|#REQUIRED|) - (perror p "Missing required attribute value" qname)) + (perror p "Missing required attribute value" name)) (if (pair? default) - (cons (%make-xml-attribute (cons qname p) (cdr default)) attrs) + (cons (%make-xml-attribute (cons name p) (cdr default)) attrs) attrs)))))) ;;;; Other markup @@ -535,42 +535,21 @@ USA. (define parse-unexpanded-name ;[5] (*parser (with-pointer p - (map (lambda (s) (cons (make-xml-qname s) p)) - (match match-qname))))) + (map (lambda (s) (cons (make-xml-name s) p)) + (match match:xml-name))))) (define (simple-name-parser type) (let ((m (string-append "Malformed " type " name"))) - (*parser (require-success m (map make-xml-qname (match match-ncname)))))) + (*parser (require-success m (map make-xml-name (match match:xml-name)))))) (define parse-entity-name (simple-name-parser "entity")) (define parse-pi-name (simple-name-parser "processing-instructions")) (define parse-notation-name (simple-name-parser "notation")) -(define match-qname - (*matcher - (seq match-ncname - (? (seq ":" match-ncname))))) - -(define (match-ncname buffer) - (and (match-parser-buffer-char-in-alphabet buffer alphabet:ncname-initial) - (let loop () - (if (match-parser-buffer-char-in-alphabet buffer - alphabet:ncname-subsequent) - (loop) - #t)))) - -(define parse-required-name-token ;[7] +(define parse-required-nmtoken ;[7] (*parser (require-success "Malformed XML name token" - (map make-xml-nmtoken (match match-name-token))))) - -(define (match-name-token buffer) - (and (match-parser-buffer-char-in-alphabet buffer alphabet:name-subsequent) - (let loop () - (if (match-parser-buffer-char-in-alphabet buffer - alphabet:name-subsequent) - (loop) - #t)))) + (map make-xml-nmtoken (match match:xml-nmtoken))))) ;;;; Namespaces @@ -581,28 +560,30 @@ USA. (let ((uname (xml-attribute-name (car attrs))) (value (xml-attribute-value (car attrs))) (tail (loop (cdr attrs)))) - (let ((qname (car uname)) + (let ((name (car uname)) (p (cdr uname))) (let ((forbidden-uri (lambda () (perror p "Forbidden namespace URI" value)))) - (cond ((xml-name=? qname 'xmlns) - (string->uri value) ;signals error if not URI - (if (or (string=? value xml-uri-string) - (string=? value xmlns-uri-string)) - (forbidden-uri)) - (cons (cons (null-xml-name-prefix) value) tail)) - ((xml-name-prefix=? qname 'xmlns) - (if (xml-name=? qname 'xmlns:xmlns) - (perror p "Illegal namespace prefix" qname)) + (cond ((xml-name=? name 'xmlns) + (let ((uri (string->absolute-uri value))) + (if (or (uri=? value xml-uri) + (uri=? value xmlns-uri)) + (forbidden-uri)) + (cons (cons (null-xml-name-prefix) uri) + tail))) + ((and (xml-qname? name) + (xml-name-prefix=? name 'xmlns)) + (if (xml-name=? name 'xmlns:xmlns) + (perror p "Illegal namespace prefix" name)) (string->uri value) ;signals error if not URI - (if (if (xml-name=? qname 'xmlns:xml) + (if (if (xml-name=? name 'xmlns:xml) (not (string=? value xml-uri-string)) (or (string-null? value) (string=? value xml-uri-string) (string=? value xmlns-uri-string))) (forbidden-uri)) - (cons (cons (xml-name-local qname) value) tail)) + (cons (cons (xml-name-local name) value) tail)) (else tail))))) *prefix-bindings*))) unspecific) @@ -611,24 +592,25 @@ USA. (define (expand-attribute-name uname) (expand-name uname #t)) (define (expand-name uname attribute-name?) - (let ((qname (car uname)) + (let ((name (car uname)) (p (cdr uname))) - (if *in-dtd?* - qname - (let ((string (lookup-namespace-prefix qname p attribute-name?))) - (if (string-null? string) - qname - (%make-xml-name qname string)))))) + (if (or *in-dtd?* + (not (xml-qname? name))) + name + (let ((uri (lookup-namespace-prefix name p attribute-name?))) + (if (null-xml-namespace-uri? uri) + name + (%make-xml-name name uri)))))) (define (lookup-namespace-prefix qname p attribute-name?) (let ((prefix (xml-qname-prefix qname))) (cond ((eq? prefix 'xmlns) - xmlns-uri-string) + xmlns-uri) ((eq? prefix 'xml) - xml-uri-string) + xml-uri) ((and attribute-name? (null-xml-name-prefix? prefix)) - "") + (null-xml-namespace-uri)) (else (let ((entry (assq prefix *prefix-bindings*))) (if entry @@ -636,7 +618,7 @@ USA. (begin (if (not (null-xml-name-prefix? prefix)) (perror p "Undeclared XML prefix" prefix)) - ""))))))) + (null-xml-namespace-uri)))))))) ;;;; Processing instructions @@ -730,7 +712,7 @@ USA. (alt (seq "#" (alt match-decimal (seq "x" match-hexadecimal))) - match-qname) + match:xml-name) ";")))) (define parse-entity-reference-name ;[68] @@ -739,7 +721,7 @@ USA. parse-entity-name))) (define parse-entity-reference-deferred - (*parser (match (seq "&" match-qname ";")))) + (*parser (match (seq "&" match:xml-name ";")))) (define parse-parameter-entity-reference-name ;[69] (*parser @@ -789,7 +771,7 @@ USA. (lambda (a) (car a)))) (define parse-declaration-attributes - (attribute-list-parser (*parser (map make-xml-qname (match match-qname))) + (attribute-list-parser (*parser (map make-xml-name (match match:xml-name))) (lambda (a) a))) (define (attribute-value-parser alphabet parse-reference) @@ -1146,8 +1128,8 @@ USA. parse-required-element-name S ;;[46] - (alt (map make-xml-qname (match "EMPTY")) - (map make-xml-qname (match "ANY")) + (alt (map make-xml-name (match "EMPTY")) + (map make-xml-name (match "ANY")) ;;[51] (encapsulate vector->list (with-pointer p @@ -1197,14 +1179,14 @@ USA. (define parse-!attlist-type ;[54,57] (*parser - (alt (map make-xml-qname + (alt (map make-xml-name ;;[55,56] (match (alt "CDATA" "IDREFS" "IDREF" "ID" "ENTITY" "ENTITIES" "NMTOKENS" "NMTOKEN"))) ;;[58] (encapsulate vector->list (bracket "notation type" - (seq (map make-xml-qname (match "NOTATION")) + (seq (map make-xml-name (match "NOTATION")) S "(") ")" @@ -1216,8 +1198,8 @@ USA. (encapsulate (lambda (v) (cons 'enumerated (vector->list v))) (sbracket "enumerated type" "(" ")" S? - parse-required-name-token - (* (seq S? "|" S? parse-required-name-token)) + parse-required-nmtoken + (* (seq S? "|" S? parse-required-nmtoken)) S?))))) (define parse-!attlist-default ;[60] diff --git a/src/xml/xml-struct.scm b/src/xml/xml-struct.scm index e6db02d52..4d554d961 100644 --- a/src/xml/xml-struct.scm +++ b/src/xml/xml-struct.scm @@ -121,6 +121,16 @@ USA. (define (xml-whitespace-string? object) (string-composed-of? object char-set:xml-whitespace)) +(define (string-composed-of? string char-set) + (and (string? string) + (substring-composed-of? string 0 (string-length string) char-set))) + +(define (substring-composed-of? string start end char-set) + (let loop ((index start)) + (or (fix:= index end) + (and (char-set-member? char-set (string-ref string index)) + (loop (fix:+ index 1)))))) + (define-xml-type declaration (version xml-version?) (encoding xml-encoding?) @@ -288,12 +298,12 @@ USA. (define-xml-type processing-instructions (name (lambda (object) - (and (xml-qname? object) + (and (xml-name-symbol? object) (not (xml-name=? object 'xml))))) (text canonicalize canonicalize-char-data)) (define-xml-type dtd - (root xml-qname?) + (root xml-name-symbol?) (external (lambda (object) (or (not object) (xml-external-id? object)))) @@ -326,14 +336,14 @@ USA. (string->char-set " \r\n-'()+,./:=?;!*#@$_%"))) (define-xml-type !element - (name xml-qname?) + (name xml-name-symbol?) (content-type (lambda (object) (or (eq? object '|EMPTY|) (eq? object '|ANY|) (and (pair? object) (eq? '|#PCDATA| (car object)) - (list-of-type? (cdr object) xml-qname?)) + (list-of-type? (cdr object) xml-name-symbol?)) (letrec ((children? (lambda (object) @@ -345,7 +355,7 @@ USA. (list-of-type? (cdr object) cp?)))))) (cp? (lambda (object) - (or (maybe-wrapped object xml-qname?) + (or (maybe-wrapped object xml-name-symbol?) (children? object)))) (maybe-wrapped (lambda (object pred) @@ -360,13 +370,13 @@ USA. (children? object)))))) (define-xml-type !attlist - (name xml-qname?) + (name xml-name-symbol?) (definitions canonicalize (lambda (object) (if (not (list-of-type? object (lambda (item) (and (pair? item) - (xml-qname? (car item)) + (xml-name-symbol? (car item)) (pair? (cdr item)) (!attlist-type? (cadr item)) (pair? (cddr item)) @@ -393,7 +403,7 @@ USA. (eq? object '|NMTOKEN|) (and (pair? object) (or (and (eq? (car object) '|NOTATION|) - (list-of-type? (cdr object) xml-qname?)) + (list-of-type? (cdr object) xml-name-symbol?)) (and (eq? (car object) 'enumerated) (list-of-type? (cdr object) xml-nmtoken?)))))) @@ -406,16 +416,16 @@ USA. (xml-char-data? (cdr object))))) (define-xml-type !entity - (name xml-qname?) + (name xml-name-symbol?) (value canonicalize canonicalize-entity-value)) (define-xml-type unparsed-!entity - (name xml-qname?) + (name xml-name-symbol?) (id xml-external-id?) - (notation xml-qname?)) + (notation xml-name-symbol?)) (define-xml-type parameter-!entity - (name xml-qname?) + (name xml-name-symbol?) (value canonicalize canonicalize-entity-value)) (define (canonicalize-entity-value object) @@ -432,14 +442,14 @@ USA. (canonicalize-content object)))) (define-xml-type !notation - (name xml-qname?) + (name xml-name-symbol?) (id xml-external-id?)) (define-xml-type entity-ref - (name xml-qname?)) + (name xml-name-symbol?)) (define-xml-type parameter-entity-ref - (name xml-qname?)) + (name xml-name-symbol?)) (define-syntax define-xml-printer (sc-macro-transformer @@ -468,7 +478,7 @@ USA. (define-xml-printer element (lambda (elt) - (xml-name-qname (xml-element-name elt)))) + (xml-name->symbol (xml-element-name elt)))) (define-xml-printer external-id (lambda (dtd) diff --git a/src/xml/xml.pkg b/src/xml/xml.pkg index 18c56b100..feaf593b1 100644 --- a/src/xml/xml.pkg +++ b/src/xml/xml.pkg @@ -32,52 +32,69 @@ USA. (define-package (runtime xml) (parent (runtime))) +(define-package (runtime xml chars) + (files "xml-chars") + (parent (runtime xml)) + (export () + alphabet:xml-char + char-set:xml-whitespace) + (export (runtime xml) + alphabet:char-data + alphabet:name-initial + alphabet:name-subsequent + alphabet:ncname-initial + alphabet:ncname-subsequent)) + (define-package (runtime xml names) (files "xml-names") (parent (runtime xml)) (export () error:not-xml-name + error:not-xml-name-symbol + error:not-xml-namespace-conformant-name error:not-xml-nmtoken error:not-xml-qname guarantee-xml-name + guarantee-xml-name-symbol + guarantee-xml-namespace-conformant-name guarantee-xml-nmtoken guarantee-xml-qname make-xml-name - make-xml-name-hash-table + make-xml-name-symbol make-xml-nmtoken make-xml-qname + match:xml-name + match:xml-nmtoken + match:xml-qname null-xml-name-prefix null-xml-name-prefix? null-xml-namespace-uri null-xml-namespace-uri? string-is-xml-name? string-is-xml-nmtoken? - xml-name-hash + string-is-xml-qname? + xml-name->symbol xml-name-local xml-name-local=? xml-name-prefix xml-name-prefix=? - xml-name-qname - xml-name-qname=? xml-name-string + xml-name-symbol? xml-name-uri xml-name-uri=? xml-name=? xml-name? - xml-nmtoken-string + xml-namespace-conformant-name? xml-nmtoken? xml-qname-local xml-qname-prefix - xml-qname-string xml-qname? xml-uri xml-uri-string xmlns-uri xmlns-uri-string) (export (runtime xml) - %make-xml-name - string-composed-of? - substring-composed-of?)) + %make-xml-name)) (define-package (runtime xml structure) (files "xml-struct") @@ -280,21 +297,15 @@ USA. string-of-xml-chars?)) (define-package (runtime xml parser) - (files "xml-chars" "xml-parser") + (files "xml-parser") (parent (runtime xml)) (export () - alphabet:xml-char - char-set:xml-whitespace read-xml read-xml-file string->xml utf8-string->xml xml-processing-instructions-handlers) (export (runtime xml) - alphabet:name-initial - alphabet:name-subsequent - alphabet:ncname-initial - alphabet:ncname-subsequent coding-requires-bom? normalize-coding)) diff --git a/src/xml/xpath.scm b/src/xml/xpath.scm index 4d64ad72d..4b2ee855e 100644 --- a/src/xml/xpath.scm +++ b/src/xml/xpath.scm @@ -206,7 +206,7 @@ USA. (define-method node-name ((node )) (let ((name (xml-attribute-name (node-item node)))) - (if (xml-name-qname=? name 'xmlns) + (if (eq? (xml-name->symbol name) 'xmlns) (null-xml-name-prefix) (xml-name-local name)))) @@ -317,12 +317,12 @@ USA. (let per-decl ((decls (node-ns-decls node)) (seen seen)) (if (pair? decls) (let ((decl (car decls))) - (let ((qname (xml-name-qname (xml-attribute-name decl)))) - (if (memq qname seen) + (let ((aname (xml-name->symbol (xml-attribute-name decl)))) + (if (memq aname seen) (per-decl (force (cdr decls)) seen) (cons-stream decl (per-decl (force (cdr decls)) - (cons qname seen)))))) + (cons aname seen)))))) (let ((parent (parent-node node))) (if parent (per-node parent seen) -- 2.25.1