#| -*-Scheme-*-
-$Id: xml-parser.scm,v 1.28 2003/07/27 03:38:15 cph Exp $
+$Id: xml-parser.scm,v 1.29 2003/07/30 19:44:02 cph Exp $
Copyright 2001,2002,2003 Massachusetts Institute of Technology
|#
+;; **** Namespace notes: ****
+;;
+;; * Namespace declarations may appear in !ATTLIST default values, and
+;; must be processed when these declarations are in an internal DTD.
+;;
+;; * In general, default attribute values in an internal DTD must be
+;; handled by adding appropriate attributes to the corresponding
+;; elements.
+;;
+;; * DEREFERENCE-ENTITY seems to be expanding content refs wrong. (???)
+
;;;; XML parser
;;; Comments of the form [N] refer to production rules in the XML 1.0
(fluid-let ((*general-entities* (predefined-entities))
(*standalone?*)
(*internal-dtd?* #t)
- (*pi-handlers* pi-handlers))
+ (*pi-handlers* pi-handlers)
+ (*in-dtd?* #f)
+ (*prefix-bindings* '())
+ (*attlists* '()))
(let ((declaration (one-value (parse-declaration buffer))))
(set! *standalone?*
(and declaration
(equal? (xml-declaration-standalone declaration)
"yes")))
(let* ((misc-1 (one-value (parse-misc buffer)))
- (dtd (one-value (parse-dtd buffer)))
+ (dtd
+ (one-value
+ (fluid-let ((*in-dtd?* #t))
+ (parse-dtd buffer))))
(misc-2 (if dtd (one-value (parse-misc buffer)) '()))
(element
(or (one-value (parse-element buffer))
(define *standalone?*)
(define *internal-dtd?*)
(define *pi-handlers*)
+(define *in-dtd?*)
+(define *prefix-bindings*)
+(define *attlists*)
(define parse-misc ;[27]
(*parser
(lambda (v)
(transform-declaration (vector-ref v 0) text-decl? p))
(sbracket description "<?xml" "?>"
- parse-attribute-list))))))
+ parse-declaration-attributes))))))
(define parse-declaration ;[23,24,32,80]
(xml-declaration-parser "XML declaration" #f))
(define (parse-element buffer) ;[39]
(let ((p (get-parser-buffer-pointer buffer)))
- (let ((v (parse-start-tag buffer)))
- (and v
- (vector
- (make-xml-element
- (vector-ref v 0)
- (vector-ref v 1)
- (if (string=? (vector-ref v 2) ">")
- (let loop ((elements '#()))
- (let ((v* (parse-end-tag buffer)))
- (if v*
- (begin
- (if (not (eq? (vector-ref v 0) (vector-ref v* 0)))
- (perror p "Mismatched start tag"
- (vector-ref v 0) (vector-ref v* 0)))
- (let ((contents
- (coalesce-strings!
- (delete-matching-items!
- (vector->list elements)
- (lambda (element)
- (and (string? element)
- (string-null? element)))))))
- (if (null? contents)
- ;; Preserve fact that this element
- ;; was formed by a start/end tag pair
- ;; rather than by an empty-element
- ;; tag.
- (list "")
- contents)))
- (let ((v* (parse-content buffer)))
- (if (not v*)
- (perror p "Unterminated start tag"
- (vector-ref v 0)))
- (if (equal? v* '#(""))
- (perror p "Unknown content"))
- (loop (vector-append elements v*))))))
- '())))))))
+ (fluid-let ((*prefix-bindings* *prefix-bindings*))
+ (let ((v (parse-start-tag buffer)))
+ (and v
+ (vector
+ (make-xml-element
+ (vector-ref v 0)
+ (vector-ref v 1)
+ (if (string=? (vector-ref v 2) ">")
+ (let loop ((elements '#()))
+ (let ((v* (parse-end-tag buffer)))
+ (if v*
+ (begin
+ (if (not (eq? (vector-ref v 0) (vector-ref v* 0)))
+ (perror p "Mismatched start tag"
+ (vector-ref v 0) (vector-ref v* 0)))
+ (let ((contents
+ (coalesce-strings!
+ (delete-matching-items!
+ (vector->list elements)
+ (lambda (element)
+ (and (string? element)
+ (string-null? element)))))))
+ (if (null? contents)
+ ;; Preserve fact that this element
+ ;; was formed by a start/end tag pair
+ ;; rather than by an empty-element
+ ;; tag.
+ (list "")
+ contents)))
+ (let ((v* (parse-content buffer)))
+ (if (not v*)
+ (perror p "Unterminated start tag"
+ (vector-ref v 0)))
+ (if (equal? v* '#(""))
+ (perror p "Unknown content"))
+ (loop (vector-append elements v*))))))
+ '()))))))))
(define parse-start-tag ;[40,44]
(*parser
(top-level
- (bracket "start tag"
- (seq "<" parse-name)
- (match (alt (string ">") (string "/>")))
- parse-attribute-list))))
+ (with-pointer p
+ (transform (lambda (v)
+ (let ((attributes (vector-ref v 1)))
+ (process-namespace-decls attributes p)
+ (vector (intern-element-name (vector-ref v 0))
+ (map (lambda (attr)
+ (cons (intern-attribute-name (car attr))
+ (cdr attr)))
+ attributes)
+ (vector-ref v 2))))
+ (bracket "start tag"
+ (seq "<" parse-uninterned-name)
+ (match (alt (string ">") (string "/>")))
+ parse-attribute-list))))))
(define parse-end-tag ;[42]
(*parser
(top-level
(sbracket "end tag" "</" ">"
- parse-required-name
+ parse-required-element-name
S?))))
(define parse-content ;[43]
(define parse-cdata-section ;[18,19,20,21]
(bracketed-region-parser "CDATA section" "<![CDATA[" "]]>"))
-
+\f
;;;; Names
-(define parse-required-name
- (*parser (require-success "Malformed XML name" parse-name)))
+(define parse-required-element-name
+ (*parser (require-success "Malformed element name" parse-element-name)))
+
+(define parse-element-name
+ (*parser (map intern-element-name parse-uninterned-name)))
-(define parse-name ;[5]
- (*parser (map xml-intern (match match-name))))
+(define parse-attribute-name
+ (*parser (map intern-attribute-name parse-uninterned-name)))
+
+(define parse-uninterned-name ;[5]
+ (*parser
+ (encapsulate (lambda (v) v)
+ (with-pointer p
+ (seq (alt (seq (match match-name) ":")
+ (values #f))
+ (match match-name)
+ (values p))))))
+
+(define (simple-name-parser type)
+ (let ((m (string-append "Malformed " type " name")))
+ (*parser (require-success m (map xml-intern (match match-name))))))
+
+(define parse-entity-name (simple-name-parser "entity"))
+(define parse-pi-name (simple-name-parser "processing-instructions"))
+(define parse-notation-name (simple-name-parser "notation"))
(define (match-name buffer)
(and (match-utf8-char-in-alphabet buffer alphabet:name-initial)
(loop)
#t))))
-(define parse-required-name-token
- (*parser (require-success "Malformed XML name token" parse-name-token)))
-
-(define parse-name-token ;[7]
- (*parser (map xml-intern (match match-name-token))))
+(define parse-required-name-token ;[7]
+ (*parser
+ (require-success "Malformed XML name token"
+ (map xml-intern (match match-name-token)))))
(define (match-name-token buffer)
(and (match-utf8-char-in-alphabet buffer alphabet:name-subsequent)
(loop)
#t))))
\f
+(define (process-namespace-decls attributes p)
+ (set! *prefix-bindings*
+ (let loop ((attributes attributes))
+ (if (pair? attributes)
+ (let ((name (caar attributes))
+ (value (cdar attributes))
+ (tail (loop (cdr attributes)))
+ (forbidden-uri
+ (lambda (uri)
+ (perror p "Forbidden namespace URI" uri))))
+ (let ((prefix (vector-ref name 0))
+ (local-part (vector-ref name 1))
+ (uri
+ (lambda ()
+ (if (not (and (pair? value)
+ (string? (car value))
+ (null? (cdr value))))
+ (perror p "Illegal namespace URI" value))
+ (if (string-null? (car value))
+ #f ;xmlns=""
+ (car value))))
+ (guarantee-legal-uri
+ (lambda (uri)
+ (if (and uri
+ (or (string=? uri xml-uri)
+ (string=? uri xmlns-uri)))
+ (forbidden-uri uri)))))
+ (cond ((and (not prefix)
+ (string=? "xmlns" local-part))
+ (let ((uri (uri)))
+ (guarantee-legal-uri uri)
+ (cons (cons #f uri) tail)))
+ ((and prefix (string=? "xmlns" prefix))
+ (if (string=? local-part "xmlns")
+ (perror p "Illegal namespace prefix" local-part))
+ (let ((uri (uri)))
+ (if (not uri) ;legal in XML 1.1
+ (forbidden-uri ""))
+ (if (string=? local-part "xml")
+ (if (not (and uri (string=? uri xml-uri)))
+ (forbidden-uri uri))
+ (guarantee-legal-uri uri))
+ (cons (cons local-part uri) tail)))
+ (else tail))))
+ *prefix-bindings*)))
+ unspecific)
+
+(define (intern-element-name v) (intern-name v #f))
+(define (intern-attribute-name v) (intern-name v #t))
+
+(define (intern-name v attribute-name?)
+ (let ((prefix (and (vector-ref v 0) (string->symbol (vector-ref v 0))))
+ (local (string->symbol (vector-ref v 1)))
+ (p (vector-ref v 2)))
+ (%make-xml-name prefix
+ local
+ (if (or *in-dtd?* (and attribute-name? (not prefix)))
+ #f
+ (case prefix
+ ((xmlns) xmlns-uri)
+ ((xml) xml-uri)
+ (else
+ (let ((entry (assq prefix *prefix-bindings*)))
+ (if entry
+ (cdr entry)
+ (begin
+ (if prefix
+ (perror p "Unknown XML prefix:" prefix))
+ #f)))))))))
+
+(define xml-uri "http://www.w3.org/XML/1998/namespace")
+(define xmlns-uri "http://www.w3.org/2000/xmlns/")
+\f
;;;; Processing instructions
(define (pi-parser valid-content?) ;[16,17]
(if (string-ci=? (symbol-name name) "xml")
(perror p "Illegal PI name" name))
name)
- parse-required-name))
+ parse-pi-name))
parse-body))))))
(define parse-pi:misc
(*parser
(alt parse-char-reference
(with-pointer p
- (transform (lambda (v) (dereference-entity (vector-ref v 0) #f p))
+ (transform
+ (lambda (v)
+ (let ((name (vector-ref v 0)))
+ (or (dereference-entity name #f p)
+ (vector (make-xml-entity-ref name)))))
parse-entity-reference-name)))))
(define parse-reference-deferred
(define parse-entity-reference-name ;[68]
(*parser
(sbracket "entity reference" "&" ";"
- parse-required-name)))
+ parse-entity-name)))
(define parse-entity-reference-deferred
(*parser (match (seq (string "&") match-name (string ";")))))
(define parse-parameter-entity-reference-name ;[69]
(*parser
(sbracket "parameter-entity reference" "%" ";"
- parse-required-name)))
+ parse-entity-name)))
(define parse-parameter-entity-reference
(*parser
\f
;;;; Attributes
-(define parse-attribute-list
- (*parser
- (with-pointer p
- (encapsulate
- (lambda (v)
- (let ((alist (vector->list v)))
- (do ((alist alist (cdr alist)))
- ((not (pair? alist)))
- (let ((entry (assq (caar alist) (cdr alist))))
- (if entry
- (perror p "Duplicate entry in attribute list"))))
- alist))
- (seq (* parse-attribute)
- S?)))))
-
-(define parse-attribute ;[41,25]
+(define (attribute-list-parser parse-name)
+ (let ((parse-attribute (attribute-parser parse-name)))
+ (*parser
+ (with-pointer p
+ (encapsulate
+ (lambda (v)
+ (let ((alist (vector->list v)))
+ (do ((alist alist (cdr alist)))
+ ((not (pair? alist)))
+ (let ((entry (assq (caar alist) (cdr alist))))
+ (if entry
+ (perror p "Duplicate entry in attribute list"))))
+ alist))
+ (seq (* parse-attribute)
+ S?))))))
+
+(define (attribute-parser parse-name) ;[41,25]
(*parser
(encapsulate (lambda (v) (cons (vector-ref v 0) (vector-ref v 1)))
(seq S
S?
parse-attribute-value))))
+(define parse-declaration-attributes
+ (attribute-list-parser (*parser (map xml-intern (match match-name)))))
+
+(define parse-attribute-list
+ (attribute-list-parser parse-uninterned-name))
+
(define (attribute-value-parser alphabet parse-reference)
(let ((a1 (alphabet- alphabet (string->alphabet "\"")))
(a2 (alphabet- alphabet (string->alphabet "'"))))
0))
(result
(cons (get-output-string port) result)))
- (let ((value
- (vector-ref
- (dereference-entity name #t p)
- 0)))
- (if (string? value)
+ (let ((v (dereference-entity name #t p)))
+ (if v
(expand-entity-value name p
(lambda ()
- (loop (list value) result)))
- (cons value result))))))))
+ (loop v result)))
+ (cons (make-xml-entity-ref name)
+ result))))))))
(else
(write-char char port)
(normalize-string port result))))))
(define (make-parameter-entity name value)
(let ((entity (make-xml-parameter-!entity name value)))
- (if (not (or (eq? *parameter-entities* 'STOP)
- (find-parameter-entity name)))
+ (if (and (not (eq? *parameter-entities* 'STOP))
+ (not (find-parameter-entity name)))
(set! *parameter-entities* (cons entity *parameter-entities*)))
entity))
(define (make-entity name value)
(let ((entity (make-xml-!entity name value)))
- (if (not (or (eq? *general-entities* 'STOP)
- (find-entity name)))
+ (if (and (not (eq? *general-entities* 'STOP))
+ (not (find-entity name)))
(set! *general-entities* (cons entity *general-entities*)))
entity))
(define (dereference-entity name in-attribute? p)
(if (eq? *general-entities* 'STOP)
- (vector (make-xml-entity-ref name))
+ #f
(begin
(if (assq name *entity-expansion-nesting*)
(perror p "Circular entity reference" name))
(if (xml-unparsed-!entity? entity)
(perror p "Reference to unparsed entity" name))
(let ((value (xml-!entity-value entity)))
- (cond ((and (pair? value)
+ (cond ((xml-external-id? value) #f)
+ (in-attribute? value)
+ ((and (pair? value)
(string? (car value))
(null? (cdr value)))
- (if in-attribute?
- (vector (car value))
- (expand-entity-value-string name (car value) p)))
- ((xml-external-id? value)
- (begin
- (if in-attribute?
- (perror
- p
- "Reference to external entity in attribute"
- name))
- (vector (make-xml-entity-ref name))))
+ (reparse-entity-value-string name (car value) p))
(else
(if (or *standalone?* *internal-dtd?*)
(perror p "Reference to partially-defined entity"
name))
- (vector (make-xml-entity-ref name))))))
+ #f))))
(begin
(if (or *standalone?* *internal-dtd?*)
(perror p "Reference to undefined entity" name))
- (vector (make-xml-entity-ref name))))))))
+ #f))))))
-(define (expand-entity-value-string name string p)
+(define (reparse-entity-value-string name string p)
(let ((v
(expand-entity-value name p
(lambda ()
(sbracket "document-type declaration" "<!DOCTYPE" ">"
(require-success "Malformed document type"
(seq S
- parse-required-name
+ parse-required-element-name
(map (lambda (external)
(if external (set! *internal-dtd?* #f))
external)
(parse-cp ;[48]
(*parser
(alt (encapsulate encapsulate-suffix
- (seq parse-name
+ (seq parse-element-name
(? (match (char-set "?*+")))))
parse-children)))
(lambda (v) (make-xml-!element (vector-ref v 0) (vector-ref v 1)))
(sbracket "element declaration" "<!ELEMENT" ">"
S
- parse-required-name
+ parse-required-element-name
S
;;[46]
(alt (map intern (match (string "EMPTY")))
S?
"#PCDATA"
(alt (seq S? ")")
- (seq (* (seq S? "|" S? parse-required-name))
+ (seq (* (seq S? "|" S?
+ parse-required-element-name))
S?
")*")
(define parse-!attlist ;[52,53]
(*parser
(encapsulate
- (lambda (v) (make-xml-!attlist (vector-ref v 0) (vector-ref v 1)))
+ (lambda (v)
+ (let ((attlist (make-xml-!attlist (vector-ref v 0) (vector-ref v 1))))
+ (set! *attlists* (cons attlist *attlists*))
+ attlist))
(sbracket "attribute-list declaration" "<!ATTLIST" ">"
S
- parse-required-name
+ parse-required-element-name
(encapsulate vector->list
(* (encapsulate
(lambda (v)
(trim-attribute-whitespace (cadr default)))
default))))
(seq S
- parse-name
+ parse-attribute-name
S
parse-!attlist-type
S
(noise (seq (string "NOTATION") S (string "(")))
")"
S?
- parse-required-name
- (* (seq S? "|" S? parse-required-name))
+ parse-notation-name
+ (* (seq S? "|" S? parse-notation-name))
S?))
;;[59]
(encapsulate (lambda (v) (cons 'ENUMERATED (vector->list v)))
(make-parameter-entity (vector-ref v 0) (vector-ref v 1)))
(seq "%"
S
- parse-required-name
+ parse-entity-name
S
(alt parse-entity-value
parse-external-id)))
(make-unparsed-entity (vector-ref v 0)
(vector-ref v 1)
(vector-ref v 2))))
- (seq parse-required-name
+ (seq parse-entity-name
S
(alt parse-entity-value
(seq parse-external-id
- (? (seq S "NDATA" S parse-required-name)))))))
+ (? (seq S "NDATA" S
+ parse-notation-name)))))))
S?)))
(define parse-!notation ;[82,83]
(lambda (v) (make-xml-!notation (vector-ref v 0) (vector-ref v 1)))
(sbracket "notation declaration" "<!NOTATION" ">"
S
- parse-required-name
+ parse-notation-name
S
(alt parse-external-id
(encapsulate
#| -*-Scheme-*-
-$Id: xml-struct.scm,v 1.17 2003/07/25 20:38:28 cph Exp $
+$Id: xml-struct.scm,v 1.18 2003/07/30 19:44:05 cph Exp $
Copyright 2001,2002,2003 Massachusetts Institute of Technology
(declare (usual-integrations))
\f
-(define (xml-intern name)
- (if (not (and (string? name) (string-is-xml-nmtoken? name)))
- (error:wrong-type-argument name "XML nmtoken string" 'XML-INTERN))
- (string->symbol name))
+(define-record-type <combo-name>
+ (make-combo-name simple universal)
+ combo-name?
+ (simple combo-name-simple)
+ (universal combo-name-universal))
+
+(set-record-type-unparser-method! <combo-name>
+ (standard-unparser-method 'XML-NAME
+ (lambda (name port)
+ (write-char #\space port)
+ (write (combo-name-simple name) port))))
+
+(define-record-type <universal-name>
+ (make-universal-name uri local)
+ universal-name?
+ (uri universal-name-uri)
+ (local universal-name-local)
+ (combos universal-name-combos))
(define (xml-name? object)
- (and (symbol? object)
- (string-is-xml-name? (symbol-name object))))
+ (or (and (symbol? object)
+ (string-is-xml-name? (symbol-name object)))
+ (combo-name? object)))
+
+(define (guarantee-xml-name object caller)
+ (if (not (xml-name? object))
+ (error:not-xml-name object caller)))
+
+(define (error:not-xml-name object caller)
+ (error:wrong-type-argument object "an XML name" caller))
+
+(define (xml-intern string #!optional uri)
+ (guarantee-string string 'XML-INTERN)
+ (cond ((and (string-is-xml-nmtoken? string)
+ (or (default-object? uri) (not uri)))
+ (string->symbol string))
+ ((string-is-xml-name? string)
+ (guarantee-string uri 'XML-INTERN)
+ (if (not (and (fix:> (string-length uri) 0)
+ (utf8-string-valid? uri)))
+ (error:wrong-type-argument uri "an XML name URI" 'XML-INTERN))
+ (let ((simple (string->symbol string)))
+ (%%make-xml-name simple
+ uri
+ (let ((c (string-find-next-char string #\:)))
+ (if c
+ (string->symbol
+ (string-tail string (fix:+ c 1)))
+ simple)))))
+ (else
+ (error:wrong-type-argument string "an XML name string" 'XML-INTERN))))
+
+(define (%make-xml-name prefix local uri)
+ (let ((simple (if prefix (symbol-append prefix ': local) local)))
+ (if uri
+ (%%make-xml-name simple uri local)
+ simple)))
+
+(define (%%make-xml-name simple uri local)
+ (let ((uname
+ (hash-table/intern! (hash-table/intern! universal-names
+ uri
+ make-eq-hash-table)
+ local
+ (lambda ()
+ (make-universal-name uri
+ local
+ (make-eq-hash-table))))))
+ (hash-table/intern! (universal-name-combos uname)
+ simple
+ (lambda () (make-combo-name simple uname)))))
+
+(define universal-names
+ (make-string-hash-table))
+\f
+(define (xml-name-string name)
+ (cond ((xml-nmtoken? name) (symbol-name name))
+ ((combo-name? name) (symbol-name (combo-name-simple name)))
+ (else (error:not-xml-name name 'XML-NAME-STRING))))
+
+(define (xml-name-uri name)
+ (cond ((xml-nmtoken? name) #f)
+ ((combo-name? name) (universal-name-uri (combo-name-universal name)))
+ (else (error:not-xml-name name 'XML-NAME-URI))))
+
+(define (xml-name=? n1 n2)
+ (let ((lose (lambda (n) (error:not-xml-name n 'XML-NAME=?))))
+ (cond ((xml-nmtoken? n1)
+ (cond ((xml-nmtoken? n2) (eq? n1 n2))
+ ((combo-name? n2) (eq? n1 (combo-name-simple n2)))
+ (else (lose n2))))
+ ((combo-name? n1)
+ (cond ((xml-nmtoken? n2)
+ (eq? (combo-name-simple n1) n2))
+ ((combo-name? n2)
+ (eq? (combo-name-universal n1)
+ (combo-name-universal n2)))
+ (else (lose n2))))
+ (else (lose n1)))))
(define (xml-nmtoken? object)
(and (symbol? object)
(define (string-is-xml-name? string)
(let ((buffer (string->parser-buffer string)))
(and (match-utf8-char-in-alphabet buffer alphabet:name-initial)
- (let loop ()
- (if (peek-parser-buffer-char buffer)
- (and (match-utf8-char-in-alphabet buffer
- alphabet:name-subsequent)
- (loop))
- #t)))))
+ (let loop ((nc 0))
+ (cond ((match-parser-buffer-char buffer #\:)
+ (loop (fix:+ nc 1)))
+ ((peek-parser-buffer-char buffer)
+ (and (match-utf8-char-in-alphabet buffer
+ alphabet:name-subsequent)
+ (loop nc)))
+ (else (fix:<= nc 1)))))))
(define (string-is-xml-nmtoken? string)
(let ((buffer (string->parser-buffer string)))
(contents xml-content?))
(define (xml-attribute-list? object)
- (list-of-type? object xml-attribute?))
+ (and (list-of-type? object xml-attribute?)
+ (let loop ((attributes object))
+ (if (pair? attributes)
+ (and (not (there-exists? (cdr attributes)
+ (let ((name (caar attributes)))
+ (lambda (attribute)
+ (xml-name=? (car attribute) name)))))
+ (loop (cdr attributes)))
+ #t))))
(define (xml-attribute? object)
(and (pair? object)