First draft of XML namespace support.
authorChris Hanson <org/chris-hanson/cph>
Wed, 30 Jul 2003 19:44:05 +0000 (19:44 +0000)
committerChris Hanson <org/chris-hanson/cph>
Wed, 30 Jul 2003 19:44:05 +0000 (19:44 +0000)
v7/src/xml/xml-chars.scm
v7/src/xml/xml-output.scm
v7/src/xml/xml-parser.scm
v7/src/xml/xml-struct.scm
v7/src/xml/xml.pkg

index 4f637456dbc7824f3a3a9d77e6aa7318a5946f08..d7fc34ff91e1c4a3489d5281a6c70075bcb188c2 100644 (file)
@@ -1,8 +1,8 @@
 #| -*-Scheme-*-
 
-$Id: xml-chars.scm,v 1.5 2003/02/14 18:28:38 cph Exp $
+$Id: xml-chars.scm,v 1.6 2003/07/30 19:43:55 cph Exp $
 
-Copyright 2001 Massachusetts Institute of Technology
+Copyright 2001,2003 Massachusetts Institute of Technology
 
 This file is part of MIT/GNU Scheme.
 
@@ -384,7 +384,7 @@ USA.
 (define alphabet:name-initial
   (alphabet+ alphabet:xml-base-char
             alphabet:xml-ideographic
-            (string->alphabet "_:")))
+            (string->alphabet "_")))
 
 (define alphabet:name-subsequent               ;[4]
   (alphabet+ alphabet:xml-base-char
index f62e1eb63f69c51974bffbaa9a4685a8ef700a5b..14ff4123785461f8f66904a571ba60437e2e8a5b 100644 (file)
@@ -1,6 +1,6 @@
 #| -*-Scheme-*-
 
-$Id: xml-output.scm,v 1.21 2003/07/25 17:24:22 cph Exp $
+$Id: xml-output.scm,v 1.22 2003/07/30 19:43:59 cph Exp $
 
 Copyright 2001,2002,2003 Massachusetts Institute of Technology
 
@@ -129,7 +129,7 @@ USA.
          (for-each (lambda (content) (%write-xml content ctx))
                    contents)
          (emit-string "</" ctx)
-         (write-xml-name (xml-element-name element) ctx)
+         (write-xml-name name ctx)
          (emit-string ">" ctx))
        (emit-string " />" ctx))))
 
@@ -204,7 +204,7 @@ USA.
                 (lambda (type)
                   (handle-iterator type
                     (lambda (type)
-                      (if (symbol? type)
+                      (if (xml-name? type)
                           (write-xml-name type ctx)
                           (write-children type))))))
                (handle-iterator
@@ -412,10 +412,10 @@ USA.
     n))
 \f
 (define (write-xml-name name ctx)
-  (emit-string (symbol-name name) ctx))
+  (emit-string (xml-name-string name) ctx))
 
 (define (xml-name-columns name)
-  (utf8-string-length (symbol-name name)))
+  (utf8-string-length (xml-name-string name)))
 
 (define (write-entity-value value col ctx)
   (if (xml-external-id? value)
index 739211b86694411b1b1992f566b3da4df24fdbaf..1bac76526062f885db2c05ee539a05a10fabc393 100644 (file)
@@ -1,6 +1,6 @@
 #| -*-Scheme-*-
 
-$Id: xml-parser.scm,v 1.28 2003/07/27 03:38:15 cph Exp $
+$Id: xml-parser.scm,v 1.29 2003/07/30 19:44:02 cph Exp $
 
 Copyright 2001,2002,2003 Massachusetts Institute of Technology
 
@@ -23,6 +23,17 @@ USA.
 
 |#
 
+;; **** Namespace notes: ****
+;;
+;; * Namespace declarations may appear in !ATTLIST default values, and
+;;   must be processed when these declarations are in an internal DTD.
+;;
+;; * In general, default attribute values in an internal DTD must be
+;;   handled by adding appropriate attributes to the corresponding
+;;   elements.
+;;
+;; * DEREFERENCE-ENTITY seems to be expanding content refs wrong.  (???)
+
 ;;;; XML parser
 
 ;;; Comments of the form [N] refer to production rules in the XML 1.0
@@ -110,14 +121,20 @@ USA.
       (fluid-let ((*general-entities* (predefined-entities))
                  (*standalone?*)
                  (*internal-dtd?* #t)
-                 (*pi-handlers* pi-handlers))
+                 (*pi-handlers* pi-handlers)
+                 (*in-dtd?* #f)
+                 (*prefix-bindings* '())
+                 (*attlists* '()))
        (let ((declaration (one-value (parse-declaration buffer))))
          (set! *standalone?*
                (and declaration
                     (equal? (xml-declaration-standalone declaration)
                             "yes")))
          (let* ((misc-1 (one-value (parse-misc buffer)))
-                (dtd (one-value (parse-dtd buffer)))
+                (dtd
+                 (one-value
+                  (fluid-let ((*in-dtd?* #t))
+                    (parse-dtd buffer))))
                 (misc-2 (if dtd (one-value (parse-misc buffer)) '()))
                 (element
                  (or (one-value (parse-element buffer))
@@ -135,6 +152,9 @@ USA.
 (define *standalone?*)
 (define *internal-dtd?*)
 (define *pi-handlers*)
+(define *in-dtd?*)
+(define *prefix-bindings*)
+(define *attlists*)
 
 (define parse-misc                     ;[27]
   (*parser
@@ -152,7 +172,7 @@ USA.
          (lambda (v)
            (transform-declaration (vector-ref v 0) text-decl? p))
        (sbracket description "<?xml" "?>"
-         parse-attribute-list))))))
+         parse-declaration-attributes))))))
 
 (define parse-declaration              ;[23,24,32,80]
   (xml-declaration-parser "XML declaration" #f))
@@ -226,56 +246,67 @@ USA.
 
 (define (parse-element buffer)         ;[39]
   (let ((p (get-parser-buffer-pointer buffer)))
-    (let ((v (parse-start-tag buffer)))
-      (and v
-          (vector
-           (make-xml-element
-            (vector-ref v 0)
-            (vector-ref v 1)
-            (if (string=? (vector-ref v 2) ">")
-                (let loop ((elements '#()))
-                  (let ((v* (parse-end-tag buffer)))
-                    (if v*
-                        (begin
-                          (if (not (eq? (vector-ref v 0) (vector-ref v* 0)))
-                              (perror p "Mismatched start tag"
-                                      (vector-ref v 0) (vector-ref v* 0)))
-                          (let ((contents
-                                 (coalesce-strings!
-                                  (delete-matching-items!
-                                      (vector->list elements)
-                                    (lambda (element)
-                                      (and (string? element)
-                                           (string-null? element)))))))
-                            (if (null? contents)
-                                ;; Preserve fact that this element
-                                ;; was formed by a start/end tag pair
-                                ;; rather than by an empty-element
-                                ;; tag.
-                                (list "")
-                                contents)))
-                        (let ((v* (parse-content buffer)))
-                          (if (not v*)
-                              (perror p "Unterminated start tag"
-                                      (vector-ref v 0)))
-                          (if (equal? v* '#(""))
-                              (perror p "Unknown content"))
-                          (loop (vector-append elements v*))))))
-                '())))))))
+    (fluid-let ((*prefix-bindings* *prefix-bindings*))
+      (let ((v (parse-start-tag buffer)))
+       (and v
+            (vector
+             (make-xml-element
+              (vector-ref v 0)
+              (vector-ref v 1)
+              (if (string=? (vector-ref v 2) ">")
+                  (let loop ((elements '#()))
+                    (let ((v* (parse-end-tag buffer)))
+                      (if v*
+                          (begin
+                            (if (not (eq? (vector-ref v 0) (vector-ref v* 0)))
+                                (perror p "Mismatched start tag"
+                                        (vector-ref v 0) (vector-ref v* 0)))
+                            (let ((contents
+                                   (coalesce-strings!
+                                    (delete-matching-items!
+                                        (vector->list elements)
+                                      (lambda (element)
+                                        (and (string? element)
+                                             (string-null? element)))))))
+                              (if (null? contents)
+                                  ;; Preserve fact that this element
+                                  ;; was formed by a start/end tag pair
+                                  ;; rather than by an empty-element
+                                  ;; tag.
+                                  (list "")
+                                  contents)))
+                          (let ((v* (parse-content buffer)))
+                            (if (not v*)
+                                (perror p "Unterminated start tag"
+                                        (vector-ref v 0)))
+                            (if (equal? v* '#(""))
+                                (perror p "Unknown content"))
+                            (loop (vector-append elements v*))))))
+                  '()))))))))
 
 (define parse-start-tag                        ;[40,44]
   (*parser
    (top-level
-    (bracket "start tag"
-       (seq "<" parse-name)
-       (match (alt (string ">") (string "/>")))
-      parse-attribute-list))))
+    (with-pointer p
+      (transform (lambda (v)
+                  (let ((attributes (vector-ref v 1)))
+                    (process-namespace-decls attributes p)
+                    (vector (intern-element-name (vector-ref v 0))
+                            (map (lambda (attr)
+                                   (cons (intern-attribute-name (car attr))
+                                         (cdr attr)))
+                                 attributes)
+                            (vector-ref v 2))))
+       (bracket "start tag"
+           (seq "<" parse-uninterned-name)
+           (match (alt (string ">") (string "/>")))
+         parse-attribute-list))))))
 
 (define parse-end-tag                  ;[42]
   (*parser
    (top-level
     (sbracket "end tag" "</" ">"
-      parse-required-name
+      parse-required-element-name
       S?))))
 
 (define parse-content                  ;[43]
@@ -325,14 +356,34 @@ USA.
 
 (define parse-cdata-section            ;[18,19,20,21]
   (bracketed-region-parser "CDATA section" "<![CDATA[" "]]>"))
-
+\f
 ;;;; Names
 
-(define parse-required-name
-  (*parser (require-success "Malformed XML name" parse-name)))
+(define parse-required-element-name
+  (*parser (require-success "Malformed element name" parse-element-name)))
+
+(define parse-element-name
+  (*parser (map intern-element-name parse-uninterned-name)))
 
-(define parse-name                     ;[5]
-  (*parser (map xml-intern (match match-name))))
+(define parse-attribute-name
+  (*parser (map intern-attribute-name parse-uninterned-name)))
+
+(define parse-uninterned-name          ;[5]
+  (*parser
+   (encapsulate (lambda (v) v)
+     (with-pointer p
+       (seq (alt (seq (match match-name) ":")
+                (values #f))
+           (match match-name)
+           (values p))))))
+
+(define (simple-name-parser type)
+  (let ((m (string-append "Malformed " type " name")))
+    (*parser (require-success m (map xml-intern (match match-name))))))
+
+(define parse-entity-name (simple-name-parser "entity"))
+(define parse-pi-name (simple-name-parser "processing-instructions"))
+(define parse-notation-name (simple-name-parser "notation"))
 
 (define (match-name buffer)
   (and (match-utf8-char-in-alphabet buffer alphabet:name-initial)
@@ -341,11 +392,10 @@ USA.
             (loop)
             #t))))
 
-(define parse-required-name-token
-  (*parser (require-success "Malformed XML name token" parse-name-token)))
-
-(define parse-name-token               ;[7]
-  (*parser (map xml-intern (match match-name-token))))
+(define parse-required-name-token      ;[7]
+  (*parser
+   (require-success "Malformed XML name token"
+     (map xml-intern (match match-name-token)))))
 
 (define (match-name-token buffer)
   (and (match-utf8-char-in-alphabet buffer alphabet:name-subsequent)
@@ -354,6 +404,79 @@ USA.
             (loop)
             #t))))
 \f
+(define (process-namespace-decls attributes p)
+  (set! *prefix-bindings*
+       (let loop ((attributes attributes))
+         (if (pair? attributes)
+             (let ((name (caar attributes))
+                   (value (cdar attributes))
+                   (tail (loop (cdr attributes)))
+                   (forbidden-uri
+                    (lambda (uri)
+                      (perror p "Forbidden namespace URI" uri))))
+               (let ((prefix (vector-ref name 0))
+                     (local-part (vector-ref name 1))
+                     (uri
+                      (lambda ()
+                        (if (not (and (pair? value)
+                                      (string? (car value))
+                                      (null? (cdr value))))
+                            (perror p "Illegal namespace URI" value))
+                        (if (string-null? (car value))
+                            #f         ;xmlns=""
+                            (car value))))
+                     (guarantee-legal-uri
+                      (lambda (uri)
+                        (if (and uri
+                                 (or (string=? uri xml-uri)
+                                     (string=? uri xmlns-uri)))
+                            (forbidden-uri uri)))))
+                 (cond ((and (not prefix)
+                             (string=? "xmlns" local-part))
+                        (let ((uri (uri)))
+                          (guarantee-legal-uri uri)
+                          (cons (cons #f uri) tail)))
+                       ((and prefix (string=? "xmlns" prefix))
+                        (if (string=? local-part "xmlns")
+                            (perror p "Illegal namespace prefix" local-part))
+                        (let ((uri (uri)))
+                          (if (not uri) ;legal in XML 1.1
+                              (forbidden-uri ""))
+                          (if (string=? local-part "xml")
+                              (if (not (and uri (string=? uri xml-uri)))
+                                  (forbidden-uri uri))
+                              (guarantee-legal-uri uri))
+                          (cons (cons local-part uri) tail)))
+                       (else tail))))
+             *prefix-bindings*)))
+  unspecific)
+
+(define (intern-element-name v) (intern-name v #f))
+(define (intern-attribute-name v) (intern-name v #t))
+
+(define (intern-name v attribute-name?)
+  (let ((prefix (and (vector-ref v 0) (string->symbol (vector-ref v 0))))
+       (local (string->symbol (vector-ref v 1)))
+       (p (vector-ref v 2)))
+    (%make-xml-name prefix
+                   local
+                   (if (or *in-dtd?* (and attribute-name? (not prefix)))
+                       #f
+                       (case prefix
+                         ((xmlns) xmlns-uri)
+                         ((xml) xml-uri)
+                         (else
+                          (let ((entry (assq prefix *prefix-bindings*)))
+                            (if entry
+                                (cdr entry)
+                                (begin
+                                  (if prefix
+                                      (perror p "Unknown XML prefix:" prefix))
+                                  #f)))))))))
+
+(define xml-uri "http://www.w3.org/XML/1998/namespace")
+(define xmlns-uri "http://www.w3.org/2000/xmlns/")
+\f
 ;;;; Processing instructions
 
 (define (pi-parser valid-content?) ;[16,17]
@@ -380,7 +503,7 @@ USA.
                    (if (string-ci=? (symbol-name name) "xml")
                        (perror p "Illegal PI name" name))
                    name)
-                 parse-required-name))
+                 parse-pi-name))
           parse-body))))))
 
 (define parse-pi:misc
@@ -441,7 +564,11 @@ USA.
   (*parser
    (alt parse-char-reference
        (with-pointer p
-         (transform (lambda (v) (dereference-entity (vector-ref v 0) #f p))
+         (transform
+             (lambda (v)
+               (let ((name (vector-ref v 0)))
+                 (or (dereference-entity name #f p)
+                     (vector (make-xml-entity-ref name)))))
            parse-entity-reference-name)))))
 
 (define parse-reference-deferred
@@ -457,7 +584,7 @@ USA.
 (define parse-entity-reference-name    ;[68]
   (*parser
    (sbracket "entity reference" "&" ";"
-     parse-required-name)))
+     parse-entity-name)))
 
 (define parse-entity-reference-deferred
   (*parser (match (seq (string "&") match-name (string ";")))))
@@ -465,7 +592,7 @@ USA.
 (define parse-parameter-entity-reference-name ;[69]
   (*parser
    (sbracket "parameter-entity reference" "%" ";"
-     parse-required-name)))
+     parse-entity-name)))
 
 (define parse-parameter-entity-reference
   (*parser
@@ -474,22 +601,23 @@ USA.
 \f
 ;;;; Attributes
 
-(define parse-attribute-list
-  (*parser
-   (with-pointer p
-     (encapsulate
-        (lambda (v)
-          (let ((alist (vector->list v)))
-            (do ((alist alist (cdr alist)))
-                ((not (pair? alist)))
-              (let ((entry (assq (caar alist) (cdr alist))))
-                (if entry
-                    (perror p "Duplicate entry in attribute list"))))
-            alist))
-       (seq (* parse-attribute)
-           S?)))))
-
-(define parse-attribute                        ;[41,25]
+(define (attribute-list-parser parse-name)
+  (let ((parse-attribute (attribute-parser parse-name)))
+    (*parser
+     (with-pointer p
+       (encapsulate
+          (lambda (v)
+            (let ((alist (vector->list v)))
+              (do ((alist alist (cdr alist)))
+                  ((not (pair? alist)))
+                (let ((entry (assq (caar alist) (cdr alist))))
+                  (if entry
+                      (perror p "Duplicate entry in attribute list"))))
+              alist))
+        (seq (* parse-attribute)
+             S?))))))
+
+(define (attribute-parser parse-name)  ;[41,25]
   (*parser
    (encapsulate (lambda (v) (cons (vector-ref v 0) (vector-ref v 1)))
      (seq S
@@ -499,6 +627,12 @@ USA.
          S?
          parse-attribute-value))))
 
+(define parse-declaration-attributes
+  (attribute-list-parser (*parser (map xml-intern (match match-name)))))
+
+(define parse-attribute-list
+  (attribute-list-parser parse-uninterned-name))
+
 (define (attribute-value-parser alphabet parse-reference)
   (let ((a1 (alphabet- alphabet (string->alphabet "\"")))
        (a2 (alphabet- alphabet (string->alphabet "'"))))
@@ -585,15 +719,13 @@ USA.
                                        0))
                                      (result
                                       (cons (get-output-string port) result)))
-                                 (let ((value
-                                        (vector-ref
-                                         (dereference-entity name #t p)
-                                         0)))
-                                   (if (string? value)
+                                 (let ((v (dereference-entity name #t p)))
+                                   (if v
                                        (expand-entity-value name p
                                          (lambda ()
-                                           (loop (list value) result)))
-                                       (cons value result))))))))
+                                           (loop v result)))
+                                       (cons (make-xml-entity-ref name)
+                                             result))))))))
                        (else
                         (write-char char port)
                         (normalize-string port result))))))
@@ -662,15 +794,15 @@ USA.
 
 (define (make-parameter-entity name value)
   (let ((entity (make-xml-parameter-!entity name value)))
-    (if (not (or (eq? *parameter-entities* 'STOP)
-                (find-parameter-entity name)))
+    (if (and (not (eq? *parameter-entities* 'STOP))
+            (not (find-parameter-entity name)))
        (set! *parameter-entities* (cons entity *parameter-entities*)))
     entity))
 
 (define (make-entity name value)
   (let ((entity (make-xml-!entity name value)))
-    (if (not (or (eq? *general-entities* 'STOP)
-                (find-entity name)))
+    (if (and (not (eq? *general-entities* 'STOP))
+            (not (find-entity name)))
        (set! *general-entities* (cons entity *general-entities*)))
     entity))
 
@@ -709,7 +841,7 @@ USA.
 
 (define (dereference-entity name in-attribute? p)
   (if (eq? *general-entities* 'STOP)
-      (vector (make-xml-entity-ref name))
+      #f
       (begin
        (if (assq name *entity-expansion-nesting*)
            (perror p "Circular entity reference" name))
@@ -719,31 +851,23 @@ USA.
                (if (xml-unparsed-!entity? entity)
                    (perror p "Reference to unparsed entity" name))
                (let ((value (xml-!entity-value entity)))
-                 (cond ((and (pair? value)
+                 (cond ((xml-external-id? value) #f)
+                       (in-attribute? value)
+                       ((and (pair? value)
                              (string? (car value))
                              (null? (cdr value)))
-                        (if in-attribute?
-                            (vector (car value))
-                            (expand-entity-value-string name (car value) p)))
-                       ((xml-external-id? value)
-                        (begin
-                          (if in-attribute?
-                              (perror
-                               p
-                               "Reference to external entity in attribute"
-                               name))
-                          (vector (make-xml-entity-ref name))))
+                        (reparse-entity-value-string name (car value) p))
                        (else
                         (if (or *standalone?* *internal-dtd?*)
                             (perror p "Reference to partially-defined entity"
                                     name))
-                        (vector (make-xml-entity-ref name))))))
+                        #f))))
              (begin
                (if (or *standalone?* *internal-dtd?*)
                    (perror p "Reference to undefined entity" name))
-               (vector (make-xml-entity-ref name))))))))
+               #f))))))
 
-(define (expand-entity-value-string name string p)
+(define (reparse-entity-value-string name string p)
   (let ((v
         (expand-entity-value name p
           (lambda ()
@@ -796,7 +920,7 @@ USA.
       (sbracket "document-type declaration" "<!DOCTYPE" ">"
        (require-success "Malformed document type"
          (seq S
-              parse-required-name
+              parse-required-element-name
               (map (lambda (external)
                      (if external (set! *internal-dtd?* #f))
                      external)
@@ -861,7 +985,7 @@ USA.
        (parse-cp                       ;[48]
         (*parser
          (alt (encapsulate encapsulate-suffix
-                (seq parse-name
+                (seq parse-element-name
                      (? (match (char-set "?*+")))))
               parse-children)))
 
@@ -877,7 +1001,7 @@ USA.
         (lambda (v) (make-xml-!element (vector-ref v 0) (vector-ref v 1)))
        (sbracket "element declaration" "<!ELEMENT" ">"
         S
-        parse-required-name
+        parse-required-element-name
         S
         ;;[46]
         (alt (map intern (match (string "EMPTY")))
@@ -889,7 +1013,8 @@ USA.
                       S?
                       "#PCDATA"
                       (alt (seq S? ")")
-                           (seq (* (seq S? "|" S? parse-required-name))
+                           (seq (* (seq S? "|" S?
+                                        parse-required-element-name))
                                 S?
                                 ")*")
 
@@ -902,10 +1027,13 @@ USA.
 (define parse-!attlist                 ;[52,53]
   (*parser
    (encapsulate
-       (lambda (v) (make-xml-!attlist (vector-ref v 0) (vector-ref v 1)))
+       (lambda (v)
+        (let ((attlist (make-xml-!attlist (vector-ref v 0) (vector-ref v 1))))
+          (set! *attlists* (cons attlist *attlists*))
+          attlist))
      (sbracket "attribute-list declaration" "<!ATTLIST" ">"
        S
-       parse-required-name
+       parse-required-element-name
        (encapsulate vector->list
         (* (encapsulate
                (lambda (v)
@@ -919,7 +1047,7 @@ USA.
                                    (trim-attribute-whitespace (cadr default)))
                              default))))
              (seq S
-                  parse-name
+                  parse-attribute-name
                   S
                   parse-!attlist-type
                   S
@@ -944,8 +1072,8 @@ USA.
              (noise (seq (string "NOTATION") S (string "(")))
              ")"
            S?
-           parse-required-name
-           (* (seq S? "|" S? parse-required-name))
+           parse-notation-name
+           (* (seq S? "|" S? parse-notation-name))
            S?))
        ;;[59]
        (encapsulate (lambda (v) (cons 'ENUMERATED (vector->list v)))
@@ -978,7 +1106,7 @@ USA.
                (make-parameter-entity (vector-ref v 0) (vector-ref v 1)))
            (seq "%"
                 S
-                parse-required-name
+                parse-entity-name
                 S
                 (alt parse-entity-value
                      parse-external-id)))
@@ -989,11 +1117,12 @@ USA.
                    (make-unparsed-entity (vector-ref v 0)
                                          (vector-ref v 1)
                                          (vector-ref v 2))))
-           (seq parse-required-name
+           (seq parse-entity-name
                 S
                 (alt parse-entity-value
                      (seq parse-external-id
-                          (? (seq S "NDATA" S parse-required-name)))))))
+                          (? (seq S "NDATA" S
+                                  parse-notation-name)))))))
      S?)))
 
 (define parse-!notation                        ;[82,83]
@@ -1002,7 +1131,7 @@ USA.
        (lambda (v) (make-xml-!notation (vector-ref v 0) (vector-ref v 1)))
      (sbracket "notation declaration" "<!NOTATION" ">"
        S
-       parse-required-name
+       parse-notation-name
        S
        (alt parse-external-id
            (encapsulate
index 737a039b4cb8454fa4bacbae3f6cb06d9d0d4d0d..8402aafd4f598197b856a6327e76b3d42c281e85 100644 (file)
@@ -1,6 +1,6 @@
 #| -*-Scheme-*-
 
-$Id: xml-struct.scm,v 1.17 2003/07/25 20:38:28 cph Exp $
+$Id: xml-struct.scm,v 1.18 2003/07/30 19:44:05 cph Exp $
 
 Copyright 2001,2002,2003 Massachusetts Institute of Technology
 
@@ -27,14 +27,105 @@ USA.
 
 (declare (usual-integrations))
 \f
-(define (xml-intern name)
-  (if (not (and (string? name) (string-is-xml-nmtoken? name)))
-      (error:wrong-type-argument name "XML nmtoken string" 'XML-INTERN))
-  (string->symbol name))
+(define-record-type <combo-name>
+    (make-combo-name simple universal)
+    combo-name?
+  (simple combo-name-simple)
+  (universal combo-name-universal))
+
+(set-record-type-unparser-method! <combo-name>
+  (standard-unparser-method 'XML-NAME
+    (lambda (name port)
+      (write-char #\space port)
+      (write (combo-name-simple name) port))))
+
+(define-record-type <universal-name>
+    (make-universal-name uri local)
+    universal-name?
+  (uri universal-name-uri)
+  (local universal-name-local)
+  (combos universal-name-combos))
 
 (define (xml-name? object)
-  (and (symbol? object)
-       (string-is-xml-name? (symbol-name object))))
+  (or (and (symbol? object)
+          (string-is-xml-name? (symbol-name object)))
+      (combo-name? object)))
+
+(define (guarantee-xml-name object caller)
+  (if (not (xml-name? object))
+      (error:not-xml-name object caller)))
+
+(define (error:not-xml-name object caller)
+  (error:wrong-type-argument object "an XML name" caller))
+
+(define (xml-intern string #!optional uri)
+  (guarantee-string string 'XML-INTERN)
+  (cond ((and (string-is-xml-nmtoken? string)
+             (or (default-object? uri) (not uri)))
+        (string->symbol string))
+       ((string-is-xml-name? string)
+        (guarantee-string uri 'XML-INTERN)
+        (if (not (and (fix:> (string-length uri) 0)
+                      (utf8-string-valid? uri)))
+            (error:wrong-type-argument uri "an XML name URI" 'XML-INTERN))
+        (let ((simple (string->symbol string)))
+          (%%make-xml-name simple
+                           uri
+                           (let ((c (string-find-next-char string #\:)))
+                             (if c
+                                 (string->symbol
+                                  (string-tail string (fix:+ c 1)))
+                                 simple)))))
+       (else
+        (error:wrong-type-argument string "an XML name string" 'XML-INTERN))))
+
+(define (%make-xml-name prefix local uri)
+  (let ((simple (if prefix (symbol-append prefix ': local) local)))
+    (if uri
+       (%%make-xml-name simple uri local)
+       simple)))
+
+(define (%%make-xml-name simple uri local)
+  (let ((uname
+        (hash-table/intern! (hash-table/intern! universal-names
+                                                uri
+                                                make-eq-hash-table)
+                            local
+                            (lambda ()
+                              (make-universal-name uri
+                                                   local
+                                                   (make-eq-hash-table))))))
+    (hash-table/intern! (universal-name-combos uname)
+                       simple
+                       (lambda () (make-combo-name simple uname)))))
+
+(define universal-names
+  (make-string-hash-table))
+\f
+(define (xml-name-string name)
+  (cond ((xml-nmtoken? name) (symbol-name name))
+       ((combo-name? name) (symbol-name (combo-name-simple name)))
+       (else (error:not-xml-name name 'XML-NAME-STRING))))
+
+(define (xml-name-uri name)
+  (cond ((xml-nmtoken? name) #f)
+       ((combo-name? name) (universal-name-uri (combo-name-universal name)))
+       (else (error:not-xml-name name 'XML-NAME-URI))))
+
+(define (xml-name=? n1 n2)
+  (let ((lose (lambda (n) (error:not-xml-name n 'XML-NAME=?))))
+    (cond ((xml-nmtoken? n1)
+          (cond ((xml-nmtoken? n2) (eq? n1 n2))
+                ((combo-name? n2) (eq? n1 (combo-name-simple n2)))
+                (else (lose n2))))
+         ((combo-name? n1)
+          (cond ((xml-nmtoken? n2)
+                 (eq? (combo-name-simple n1) n2))
+                ((combo-name? n2)
+                 (eq? (combo-name-universal n1)
+                      (combo-name-universal n2)))
+                (else (lose n2))))
+         (else (lose n1)))))
 
 (define (xml-nmtoken? object)
   (and (symbol? object)
@@ -43,12 +134,14 @@ USA.
 (define (string-is-xml-name? string)
   (let ((buffer (string->parser-buffer string)))
     (and (match-utf8-char-in-alphabet buffer alphabet:name-initial)
-        (let loop ()
-          (if (peek-parser-buffer-char buffer)
-              (and (match-utf8-char-in-alphabet buffer
-                                                alphabet:name-subsequent)
-                   (loop))
-              #t)))))
+        (let loop ((nc 0))
+          (cond ((match-parser-buffer-char buffer #\:)
+                 (loop (fix:+ nc 1)))
+                ((peek-parser-buffer-char buffer)
+                 (and (match-utf8-char-in-alphabet buffer
+                                                   alphabet:name-subsequent)
+                      (loop nc)))
+                (else (fix:<= nc 1)))))))
 
 (define (string-is-xml-nmtoken? string)
   (let ((buffer (string->parser-buffer string)))
@@ -170,7 +263,15 @@ USA.
   (contents xml-content?))
 
 (define (xml-attribute-list? object)
-  (list-of-type? object xml-attribute?))
+  (and (list-of-type? object xml-attribute?)
+       (let loop ((attributes object))
+        (if (pair? attributes)
+            (and (not (there-exists? (cdr attributes)
+                        (let ((name (caar attributes)))
+                          (lambda (attribute)
+                            (xml-name=? (car attribute) name)))))
+                 (loop (cdr attributes)))
+            #t))))
 
 (define (xml-attribute? object)
   (and (pair? object)
index ec5004dea30d8865aec54332ea22b8380353234a..7f50c8cff2c43f2107889d093c04f0209a74390f 100644 (file)
@@ -1,6 +1,6 @@
 #| -*-Scheme-*-
 
-$Id: xml.pkg,v 1.21 2003/07/13 03:45:01 cph Exp $
+$Id: xml.pkg,v 1.22 2003/07/30 19:43:52 cph Exp $
 
 Copyright 2001,2002,2003 Massachusetts Institute of Technology
 
@@ -51,6 +51,7 @@ USA.
          <xml-parameter-entity-ref>
          <xml-processing-instructions>
          <xml-unparsed-!entity>
+         guarantee-xml-name
          make-xml-!attlist
          make-xml-!element
          make-xml-!entity
@@ -147,6 +148,9 @@ USA.
          xml-external-id-uri
          xml-external-id?
          xml-intern
+         xml-name-string
+         xml-name-uri
+         xml-name=?
          xml-name?
          xml-nmtoken?
          xml-parameter-!entity-name
@@ -161,7 +165,9 @@ USA.
          xml-unparsed-!entity-name
          xml-unparsed-!entity-notation
          xml-unparsed-!entity?
-         xml-whitespace-string?))
+         xml-whitespace-string?)
+  (export (runtime xml parser)
+         %make-xml-name))
 
 (define-package (runtime xml parser)
   (files "xml-chars" "xml-parser")