Major rewrite, primarily to eliminate XML-UNINTERPRETED, replacing it
authorChris Hanson <org/chris-hanson/cph>
Sat, 1 Mar 2003 16:53:39 +0000 (16:53 +0000)
committerChris Hanson <org/chris-hanson/cph>
Sat, 1 Mar 2003 16:53:39 +0000 (16:53 +0000)
with XML-ENTITY-REF and XML-PARAMETER-ENTITY-REF.  Also add careful
type checking to data structures, so that argument structure is
verified.

v7/src/xml/test-parser.scm
v7/src/xml/xml-output.scm
v7/src/xml/xml-parser.scm
v7/src/xml/xml-struct.scm
v7/src/xml/xml.pkg

index c982aa4fb1f55bf734acb5ce0a8800fd4123da29..7ae6d52d3de1dda220eb967c29d8a68e3179c7e1 100644 (file)
@@ -1,6 +1,6 @@
 #| -*-Scheme-*-
 
-$Id: test-parser.scm,v 1.9 2003/02/14 18:28:38 cph Exp $
+$Id: test-parser.scm,v 1.10 2003/03/01 16:52:10 cph Exp $
 
 Copyright 2001 Massachusetts Institute of Technology
 
@@ -23,53 +23,64 @@ USA.
 
 |#
 
-(define (test-parser pathname)
-  (call-with-input-file pathname
-    (lambda (port)
-      (parse-xml-document (input-port->parser-buffer port)))))
+(define (run-xml-tests #!optional root)
+  (let ((root
+        (merge-pathnames "xmlconf/xmltest/"
+                         (if (default-object? root)
+                             "~/xml/"
+                             (pathname-as-directory root)))))
+    (for-each (lambda (dir)
+               (newline)
+               (write-string ";")
+               (write-string dir)
+               (newline)
+               (test-directory (merge-pathnames dir root)))
+             '("valid/sa" "valid/ext-sa" "valid/not-sa"
+                          "invalid"
+                          "not-wf/sa" "not-wf/ext-sa" "not-wf/not-sa"))))
 
 (define (test-directory directory)
   (map (lambda (pathname)
         (write-string ";")
         (write-string (file-namestring pathname))
         (write-string ":\t")
-        (let ((v (ignore-errors (lambda () (test-parser pathname)))))
+        (let ((v (ignore-errors (lambda () (read-xml-file pathname)))))
           (cond ((not v)
                  (write-string "No match."))
                 ((condition? v)
                  (write-condition-report v (current-output-port)))
                 (else
-                 (write-string "Parsed: ")
-                 (write v)))
+                 (let ((s (ignore-errors (lambda () (xml->string v)))))
+                   (if (condition? s)
+                       (begin
+                         (write-string "Can't write: ")
+                         (write-condition-report s (current-output-port)))
+                       (let ((x (ignore-errors (lambda () (string->xml s)))))
+                         (if (condition? x)
+                             (begin
+                               (write-string "Can't re-read: ")
+                               (write-condition-report x
+                                                       (current-output-port)))
+                             (begin
+                               (write-string "Parsed: ")
+                               (write v))))))))
           (newline)
           v))
        (directory-read
        (merge-pathnames "*.xml" (pathname-as-directory directory)))))
 
-(define (run-xml-tests root)
-  (let ((root
-        (merge-pathnames "xmlconf/xmltest/"
-                         (pathname-as-directory root))))
-    (for-each (lambda (dir)
-               (newline)
-               (write-string ";")
-               (write-string dir)
-               (newline)
-               (test-directory (merge-pathnames dir root)))
-             '("valid/sa" "valid/ext-sa" "valid/not-sa"
-                          "invalid"
-                          "not-wf/sa" "not-wf/ext-sa" "not-wf/not-sa"))))
-
-(define (run-output-tests root output)
+(define (run-output-tests output #!optional root)
   (let ((root
         (merge-pathnames "xmlconf/xmltest/"
-                         (pathname-as-directory root)))
+                         (if (default-object? root)
+                             "~/xml/"
+                             (pathname-as-directory root))))
        (output (pathname-as-directory output)))
     (for-each (lambda (pathname)
                (write-string ";")
                (write-string (file-namestring pathname))
                (write-string ":\t")
-               (let ((v (ignore-errors (lambda () (test-parser pathname)))))
+               (let ((v (ignore-errors (lambda () (read-xml-file pathname)))))
                  (cond ((not v)
                         (write-string "No match.")
                         (newline))
index dc71b9960807d0f5d28ee5ccba180bee320090ed..52c1e172cb733ac1c3e535c8897b75d36f9b89e9 100644 (file)
@@ -1,6 +1,6 @@
 #| -*-Scheme-*-
 
-$Id: xml-output.scm,v 1.14 2003/02/14 18:28:38 cph Exp $
+$Id: xml-output.scm,v 1.15 2003/03/01 16:52:53 cph Exp $
 
 Copyright 2001,2002,2003 Massachusetts Institute of Technology
 
@@ -53,20 +53,19 @@ USA.
            (xml-document-misc-3 document)))
 
 (define-method write-xml ((declaration xml-declaration-rtd) port)
-  (write-string "<?xml" port)
-  (write-xml-attributes
-   (append (list (cons (xml-intern "version")
-                      (xml-declaration-version declaration)))
-          (if (xml-declaration-encoding declaration)
-              (list (cons (xml-intern "encoding")
-                          (xml-declaration-encoding declaration)))
-              '())
-          (if (xml-declaration-standalone declaration)
-              (list (cons (xml-intern "standalone")
-                          (xml-declaration-standalone declaration)))
-              '()))
-   2
-   port)
+  (write-string "<?xml version=\"" port)
+  (write-string (xml-declaration-version declaration) port)
+  (write-string "\"" port)
+  (if (xml-declaration-encoding declaration)
+      (begin
+       (write-string " encoding=\"" port)
+       (write-string (xml-declaration-encoding declaration) port)
+       (write-string "\"" port)))
+  (if (xml-declaration-standalone declaration)
+      (begin
+       (write-string " standalone=\"" port)
+       (write-string (xml-declaration-standalone declaration) port)
+       (write-string "\"" port)))
   (write-string "?>" port))
 
 (define-method write-xml ((element xml-element-rtd) port)
@@ -97,12 +96,6 @@ USA.
   (write-xml-name (xml-processing-instructions-name pi) port)
   (write-string (xml-processing-instructions-text pi) port)
   (write-string "?>" port))
-
-(define-method write-xml ((element xml-uninterpreted-rtd) port)
-  ;; **** There's a quoting problem here -- char data that gets
-  ;; bundled into this must be quoted prior to combination with other
-  ;; elements.
-  (write-string (xml-uninterpreted-text element) port))
 \f
 (define-method write-xml ((dtd xml-dtd-rtd) port)
   ;;root external internal
@@ -131,7 +124,7 @@ USA.
   (write-string " " port)
   (let ((type (xml-!element-content-type decl)))
     (cond ((symbol? type)
-          (write-xml-name type port))
+          (write-string (string-upcase (symbol-name type)) port))
          ((and (pair? type) (eq? (car type) 'MIX))
           (write-string "(#PCDATA" port)
           (if (pair? (cdr type))
@@ -193,7 +186,7 @@ USA.
           (write-string " " port)
           (let ((type (cadr definition)))
             (cond ((symbol? type)
-                   (write-xml-name type port))
+                   (write-string (string-upcase (symbol-name type)) port))
                   ((and (pair? type) (eq? (car type) 'NOTATION))
                    (write-string "NOTATION (" port)
                    (if (pair? (cdr type))
@@ -218,14 +211,16 @@ USA.
                    (error "Malformed !ATTLIST type:" type))))
           (write-string " " port)
           (let ((default (caddr definition)))
-            (cond ((symbol? default)
-                   (write-xml-name default port))
-                  ((and (pair? default) (eq? (car default) 'DEFAULT))
-                   (write-xml-string (cadr default) port))
-                  ((and (pair? default) (symbol? (car default)))
-                   (write-xml-name (car default) port)
+            (cond ((eq? default 'REQUIRED)
+                   (write-string "#REQUIRED" port))
+                  ((eq? default 'IMPLIED)
+                   (write-string "#IMPLIED" port))
+                  ((and (pair? default) (eq? (car default) 'FIXED))
+                   (write-string "#FIXED" port)
                    (write-string " " port)
-                   (write-xml-string (cadr default) port))
+                   (write-xml-attribute-value (cdr default) port))
+                  ((and (pair? default) (eq? (car default) 'DEFAULT))
+                   (write-xml-attribute-value (cdr default) port))
                   (else
                    (error "Malformed !ATTLIST default:" default)))))))
     (if (pair? definitions)
@@ -245,9 +240,7 @@ USA.
   (let ((indent (output-port/column port)))
     (write-xml-name (xml-!entity-name decl) port)
     (write-string " " port)
-    (if (xml-external-id? (xml-!entity-value decl))
-       (write-xml-external-id (xml-!entity-value decl) indent port)
-       (write-entity-value (xml-!entity-value decl) port))
+    (write-entity-value (xml-!entity-value decl) indent port)
     (write-string ">" port)))
 
 (define-method write-xml ((decl xml-unparsed-!entity-rtd) port)
@@ -266,9 +259,7 @@ USA.
     (write-string "% " port)
     (write-xml-name (xml-parameter-!entity-name decl) port)
     (write-string " " port)
-    (if (xml-external-id? (xml-parameter-!entity-value decl))
-       (write-xml-external-id (xml-parameter-!entity-value decl) indent port)
-       (write-entity-value (xml-parameter-!entity-value decl) port))
+    (write-entity-value (xml-parameter-!entity-value decl) indent port)
     (write-string ">" port)))
 
 (define-method write-xml ((decl xml-!notation-rtd) port)
@@ -280,26 +271,25 @@ USA.
     (write-string ">" port)))
 
 (define-method write-xml ((string <string>) port)
-  (let ((end (string-length string)))
-    (do ((i 0 (fix:+ i 1)))
-       ((fix:= i end))
-      (let ((char (string-ref string i)))
-       (cond ((char=? char #\<)
-              (write-string "&lt;" port))
-             ((char=? char #\&)
-              (write-string "&amp;" port))
-             (else
-              (write-char char port)))))))
-\f
-(define (write-xml-name name port)
-  (write-string (symbol-name name) port))
+  (write-escaped-string string
+                       '((#\< . "&lt;")
+                         (#\& . "&amp;"))
+                       port))
 
-(define (xml-name-columns name)
-  (string-length (symbol-name name)))
+(define-method write-xml ((ref xml-entity-ref-rtd) port)
+  (write-string "&" port)
+  (write-xml-name (xml-entity-ref-name ref) port)
+  (write-string ";" port))
 
+(define-method write-xml ((ref xml-parameter-entity-ref-rtd) port)
+  (write-string "%" port)
+  (write-xml-name (xml-parameter-entity-ref-name ref) port)
+  (write-string ";" port))
+\f
 (define (write-xml-attributes attributes suffix-cols port)
   (let ((start-col (output-port/column port)))
-    (if (and (pair? attributes)
+    (if (and start-col
+            (pair? attributes)
             (pair? (cdr attributes))
             (>= (+ start-col
                    (xml-attributes-columns attributes)
@@ -326,79 +316,122 @@ USA.
 
 (define (write-xml-attribute attribute port)
   (write-xml-name (car attribute) port)
-  (write-string "=" port)
-  (write-xml-string (cdr attribute) port))
+  (write-char #\= port)
+  (write-xml-attribute-value (cdr attribute) port))
+
+(define (write-xml-attribute-value value port)
+  (write-char #\" port)
+  (for-each (lambda (item)
+             (if (string? item)
+                 (write-xml-string item port)
+                 (write-xml item port)))
+           value)
+  (write-char #\" port))
 
 (define (xml-attribute-columns attribute)
   (+ (xml-name-columns (car attribute))
      1
-     (xml-string-columns (cdr attribute))))
+     (let loop ((items (cdr attribute)) (n 2))
+       (if (pair? items)
+          (loop (cdr items)
+                (+ n
+                   (if (string? (car items))
+                       (xml-string-columns (car items))
+                       (+ (xml-name-columns (xml-entity-ref-name (car items)))
+                          2))))
+          n))))
 
 (define (write-xml-string string port)
-  (let ((quote-char (if (string-find-next-char string #\") #\' #\"))
-       (end (string-length string)))
-    (write-char quote-char port)
-    (do ((i 0 (fix:+ i 1)))
-       ((fix:= i end))
-      (let ((char (string-ref string i)))
-       (cond ((char=? char quote-char)
-              (write-string (if (char=? char #\") "&quot;" "&apos;") port))
-             ((char=? char #\<)
-              (write-string "&lt;" port))
-             ((char=? char #\&)
-              (write-string "&amp;" port))
-             (else
-              (write-char char port)))))
-    (write-char quote-char port)))
+  (write-escaped-string string
+                       '((#\" . "&quot;")
+                         (#\< . "&lt;")
+                         (#\& . "&amp;"))
+                       port))
 
 (define (xml-string-columns string)
-  (let ((quote-char (if (string-find-next-char string #\") #\' #\"))
-       (end (string-length string)))
-    (let loop ((i 0) (n-cols 2))
-      (if (fix:= i end)
-         n-cols
-         (loop (fix:+ i 1)
-               (+ n-cols
-                  (let ((char (string-ref string i)))
-                    (cond ((char=? char quote-char) 6)
-                          ((char=? char #\<) 4)
-                          ((char=? char #\&) 5)
-                          (else 1)))))))))
+  (let ((n (utf8-string-length string)))
+    (for-each-utf8-char string
+      (lambda (char)
+       (set! n
+             (fix:+ n
+                    (case char
+                      ((#\") 5)
+                      ((#\<) 3)
+                      ((#\&) 4)
+                      (else 0))))
+       unspecific))
+    n))
 \f
-(define (write-entity-value string port)
-  (let ((quote-char (if (string-find-next-char string #\") #\' #\"))
-       (end (string-length string)))
-    (write-char quote-char port)
-    (do ((i 0 (fix:+ i 1)))
-       ((fix:= i end))
-      (let ((char (string-ref string i)))
-       (cond ((char=? char quote-char)
-              (write-string (if (char=? char #\") "&quot;" "&apos;") port))
-             ((char=? char #\%)
-              (write-string "&#37;" port))
-             (else
-              (write-char char port)))))
-    (write-char quote-char port)))
+(define (write-xml-name name port)
+  (write-string (symbol-name name) port))
 
-(define (write-xml-external-id id indent port)
-  (if (xml-external-id-id id)
-      (begin
-       (write-indent indent port)
-       (write-string "PUBLIC " port)
-       (write-xml-string (xml-external-id-id id) port)
-       (write-indent indent port)
-       (write-xml-string (xml-external-id-uri id) port))
+(define (xml-name-columns name)
+  (utf8-string-length (symbol-name name)))
+
+(define (write-entity-value value indent port)
+  (if (xml-external-id? value)
+      (write-xml-external-id value indent port)
       (begin
-       (write-string "SYSTEM" port)
-       (write-string " " port)
-       (write-xml-string (xml-external-id-uri id) port))))
+       (write-char #\" port)
+       (for-each
+        (lambda (item)
+          (if (string? item)
+              (write-escaped-string item
+                                    '((#\" . "&quot;")
+                                      (#\& . "&amp;")
+                                      (#\% . "&#37;"))
+                                    port)
+              (write-xml item port)))
+        value)
+       (write-char #\" port))))
+
+(define (write-xml-external-id id indent port)
+  (let ((quoted-string
+        (lambda (string)
+          (write-char #\" port)
+          (write-xml-string string port)
+          (write-char #\" port))))
+    (if (xml-external-id-id id)
+       (begin
+         (write-indent indent port)
+         (write-string "PUBLIC " port)
+         (quoted-string (xml-external-id-id id))
+         (if (xml-external-id-uri id)
+             (begin
+               (write-indent indent port)
+               (quoted-string (xml-external-id-uri id)))))
+       (begin
+         (write-indent indent port)
+         (write-string "SYSTEM" port)
+         (write-string " " port)
+         (quoted-string (xml-external-id-uri id))))))
 
 (define (write-indent n port)
-  (newline port)
-  (let ((q.r (integer-divide n 8)))
-    (do ((i 0 (fix:+ i 1)))
-       ((fix:= i (car q.r)))
-      (write-char #\tab port))
-    (do ((i 0 (fix:+ i 1)))
-       ((fix:= i (cdr q.r)))
-      (write-char #\space port))))
\ No newline at end of file
+  (if n
+      (begin
+       (newline port)
+       (let ((q.r (integer-divide n 8)))
+         (do ((i 0 (fix:+ i 1)))
+             ((fix:= i (car q.r)))
+           (write-char #\tab port))
+         (do ((i 0 (fix:+ i 1)))
+             ((fix:= i (cdr q.r)))
+           (write-char #\space port))))
+      (write-char #\space port)))
+
+(define (write-escaped-string string escapes port)
+  (for-each-utf8-char string
+    (lambda (char)
+      (let ((e (assq char escapes)))
+       (if e
+           (write-string (cdr e) port)
+           (write-utf8-char char port))))))
+
+(define (for-each-utf8-char string procedure)
+  (let ((port (open-input-string string)))
+    (let loop ()
+      (let ((char (read-utf8-char port)))
+       (if (not (eof-object? char))
+           (begin
+             (procedure char)
+             (loop)))))))
\ No newline at end of file
index e156f5df53269df5dbafa08a1a7b9c52ae953f02..85e6c998beafe78f1831aefeb2a45b2235449fcb 100644 (file)
@@ -1,6 +1,6 @@
 #| -*-Scheme-*-
 
-$Id: xml-parser.scm,v 1.21 2003/02/14 18:28:38 cph Exp $
+$Id: xml-parser.scm,v 1.22 2003/03/01 16:53:16 cph Exp $
 
 Copyright 2001,2002,2003 Massachusetts Institute of Technology
 
@@ -50,38 +50,33 @@ USA.
                            "."))
         irritants))
 
+(define (coalesce-elements v)
+  (list->vector (coalesce-strings! (vector->list v))))
+
 (define (coalesce-strings! elements)
   (do ((elements elements (cdr elements)))
       ((not (pair? elements)))
-    (if (and (string? (car elements))
-            (pair? (cdr elements))
-            (string? (cadr elements)))
-       (begin
+    (if (string? (car elements))
+       (do ()
+           ((not (and (pair? (cdr elements))
+                      (string? (cadr elements)))))
          (set-car! elements
                    (string-append (car elements)
                                   (cadr elements)))
          (set-cdr! elements (cddr elements)))))
   elements)
 
-(define (coalesce-elements elements)
-  (if (there-exists? elements xml-uninterpreted?)
-      (make-xml-uninterpreted
-       (apply string-append
-             (map (lambda (element)
-                    (if (xml-uninterpreted? element)
-                        (xml-uninterpreted-text element)
-                        element))
-                  elements)))
-      (apply string-append elements)))
-
-(define (parse-coalesced-element parser elements description ptr)
-  (let ((value (coalesce-elements elements)))
-    (if (string? value)
-       (let ((v (parser (string->parser-buffer value))))
-         (if (not v)
-             (perror ptr (string-append "Malformed " description) value))
-         v)
-       (vector value))))
+(define (parse-coalesced-element parser v description ptr)
+  (let ((v (coalesce-elements v)))
+    (if (and (fix:= (vector-length v) 1)
+            (string? (vector-ref v 0)))
+       (let ((v* (parser (string->parser-buffer (vector-ref v 0)))))
+         (if (not v*)
+             (perror ptr
+                     (string-append "Malformed " description)
+                     (vector-ref v 0)))
+         v*)
+       v)))
 
 (define (string-parser description alphabet)
   (let ((a1 (alphabet- alphabet (string->alphabet "\"")))
@@ -182,15 +177,18 @@ USA.
   (xml-declaration-parser "XML text declaration" #f))
 
 (define (transform-declaration attributes allow-standalone? p)
+  (if (not (for-all? attributes
+            (lambda (attribute)
+              (and (pair? (cdr attribute))
+                   (string? (cadr attribute))
+                   (null? (cddr attribute))))))
+      (perror p "XML declaration can't contain entity refs" attributes))
   (let ((finish
         (lambda (version encoding standalone)
-          (if (not (and (string? version)
-                        (match-xml-version (string->parser-buffer version))))
+          (if (not (match-xml-version (string->parser-buffer version)))
               (perror p "Malformed XML version" version))
           (if (and encoding
-                   (not (and (string? encoding)
-                             (match-encoding
-                              (string->parser-buffer encoding)))))
+                   (not (match-encoding (string->parser-buffer encoding))))
               (perror p "Malformed encoding attribute" encoding))
           (if standalone
               (begin
@@ -203,14 +201,14 @@ USA.
           (make-xml-declaration version encoding standalone))))
     (let loop
        ((attributes attributes)
-        (names '("version" "encoding" "standalone"))
+        (names '(version encoding standalone))
         (results '()))
       (if (pair? names)
          (if (pair? attributes)
-             (if (string=? (symbol-name (caar attributes)) (car names))
+             (if (eq? (caar attributes) (car names))
                  (loop (cdr attributes)
                        (cdr names)
-                       (cons (cdar attributes) results))
+                       (cons (cadar attributes) results))
                  (loop attributes
                        (cdr names)
                        (cons #f results)))
@@ -254,7 +252,7 @@ USA.
                                       (vector-ref v 0) (vector-ref v* 0)))
                           (let ((contents
                                  (coalesce-strings!
-                                  (list-transform-negative
+                                  (delete-matching-items!
                                       (vector->list elements)
                                     (lambda (element)
                                       (and (string? element)
@@ -279,7 +277,7 @@ USA.
   (*parser
    (top-level
     (bracket "start tag"
-       (seq (noise (string "<")) parse-name)
+       (seq "<" parse-name)
        (match (alt (string ">") (string "/>")))
       parse-attribute-list))))
 
@@ -337,6 +335,34 @@ USA.
 
 (define parse-cdata-section            ;[18,19,20,21]
   (bracketed-region-parser "CDATA section" "<![CDATA[" "]]>"))
+
+;;;; Names
+
+(define parse-required-name
+  (*parser (require-success "Malformed XML name" parse-name)))
+
+(define parse-name                     ;[5]
+  (*parser (map xml-intern (match match-name))))
+
+(define (match-name buffer)
+  (and (match-utf8-char-in-alphabet buffer alphabet:name-initial)
+       (let loop ()
+        (if (match-utf8-char-in-alphabet buffer alphabet:name-subsequent)
+            (loop)
+            #t))))
+
+(define parse-required-name-token
+  (*parser (require-success "Malformed XML name token" parse-name-token)))
+
+(define parse-name-token               ;[7]
+  (*parser (map xml-intern (match match-name-token))))
+
+(define (match-name-token buffer)
+  (and (match-utf8-char-in-alphabet buffer alphabet:name-subsequent)
+       (let loop ()
+        (if (match-utf8-char-in-alphabet buffer alphabet:name-subsequent)
+            (loop)
+            #t))))
 \f
 ;;;; Processing instructions
 
@@ -392,54 +418,35 @@ USA.
         (xml-comment? object)
         (xml-processing-instructions? object)))))
 \f
-;;;; Names and references
-
-(define parse-required-name
-  (*parser (require-success "Malformed XML name" parse-name)))
-
-(define parse-name                     ;[5]
-  (*parser (map xml-intern (match match-name))))
-
-(define (match-name buffer)
-  (and (match-utf8-char-in-alphabet buffer alphabet:name-initial)
-       (let loop ()
-        (if (match-utf8-char-in-alphabet buffer alphabet:name-subsequent)
-            (loop)
-            #t))))
-
-(define parse-required-name-token
-  (*parser (require-success "Malformed XML name token" parse-name-token)))
-
-(define parse-name-token               ;[7]
-  (*parser (map xml-intern (match match-name-token))))
-
-(define (match-name-token buffer)
-  (and (match-utf8-char-in-alphabet buffer alphabet:name-subsequent)
-       (let loop ()
-        (if (match-utf8-char-in-alphabet buffer alphabet:name-subsequent)
-            (loop)
-            #t))))
+;;;; References
 
 (define parse-char-reference           ;[66]
   (let ((make-ref
         (lambda (s r p)
           (let ((n (string->number s r)))
-            (if (not (code-point-in-alphabet? n alphabet:xml-char))
-                (perror p "Disallowed Unicode code point" n))
-            (code-point->utf8-string n)))))
+            (if (not (unicode-code-point? n))
+                (perror p "Invalid code point" n))
+            (let ((char (integer->char n)))
+              (if (not (char-in-alphabet? char alphabet:xml-char))
+                  (perror p "Disallowed Unicode character" char))
+              (call-with-output-string
+                (lambda (port)
+                  (write-utf8-char char port))))))))
     (*parser
      (with-pointer p
        (sbracket "character reference" "&#" ";"
         (alt (map (lambda (s) (make-ref s 10 p))
                   (match (+ (alphabet alphabet:numeric))))
-             (seq (noise (string "x"))
+             (seq "x"
                   (map (lambda (s) (make-ref s 16 p))
                        (match (+ (char-set "0-9a-fA-f")))))))))))
 
 (define parse-reference                        ;[67]
   (*parser
    (alt parse-char-reference
-       parse-entity-reference)))
+       (with-pointer p
+         (transform (lambda (v) (dereference-entity (vector-ref v 0) #t p))
+           parse-entity-reference-name)))))
 
 (define parse-reference-deferred
   (*parser
@@ -451,12 +458,10 @@ USA.
              match-name)
         (string ";")))))
 
-(define parse-entity-reference         ;[68]
+(define parse-entity-reference-name    ;[68]
   (*parser
-   (with-pointer p
-     (transform (lambda (v) (dereference-entity (vector-ref v 0) p))
-       (sbracket "entity reference" "&" ";"
-        parse-required-name)))))
+   (sbracket "entity reference" "&" ";"
+     parse-required-name)))
 
 (define parse-entity-reference-deferred
   (*parser (match (seq (string "&") match-name (string ";")))))
@@ -490,8 +495,7 @@ USA.
      (seq S
          parse-name
          S?
-         (require-success "Missing attribute separator"
-           (noise (string "=")))
+         (require-success "Missing attribute separator" "=")
          S?
          parse-attribute-value))))
 
@@ -499,7 +503,11 @@ USA.
   (let ((a1 (alphabet- alphabet (string->alphabet "\"")))
        (a2 (alphabet- alphabet (string->alphabet "'"))))
     (*parser
-     (encapsulate (lambda (v) (coalesce-elements (vector->list v)))
+     (encapsulate (lambda (v)
+                   (let ((elements (vector->list v)))
+                     (if (null? elements)
+                         (list "")
+                         (coalesce-strings! elements))))
        (alt (sbracket "attribute value" "\"" "\""
              (* (alt (match (+ (alphabet a1)))
                      parse-reference)))
@@ -520,39 +528,67 @@ USA.
         (attribute-value-parser alphabet:char-data
                                 parse-reference-deferred)))
     (*parser
-     (with-pointer p
-       (map (lambda (value) (normalize-attribute-value value p))
-           (require-success "Malformed attribute value"
-             parser))))))
+     (map normalize-attribute-value
+         (require-success "Malformed attribute value"
+           parser)))))
 \f
 ;;;; Normalization
 
-(define (normalize-attribute-value value p)
-  (call-with-output-string
-   (lambda (port)
-     (let normalize-value ((value value))
-       (if (string? value)
-          (let ((buffer
-                 (string->parser-buffer (normalize-line-endings value))))
-            (let loop ()
-              (let ((char (peek-parser-buffer-char buffer)))
-                (cond ((not char)
-                       unspecific)
-                      ((or (char=? char #\tab)
-                           (char=? char #\newline))
-                       (write-char #\space port)
-                       (read-parser-buffer-char buffer)
-                       (loop))
-                      ((char=? char #\&)
-                       (normalize-value
-                        (vector-ref (parse-reference buffer)
-                                    0))
-                       (loop))
-                      (else
-                       (write-char char port)
-                       (read-parser-buffer-char buffer)
-                       (loop))))))
-          (perror p "Reference to external entity in attribute"))))))
+(define (normalize-attribute-value elements)
+  ;; The spec also says that non-CDATA values must have further
+  ;; processing: leading and trailing spaces are removed, and
+  ;; sequences of spaces are collapsed.
+  (coalesce-strings!
+   (reverse!
+    (let loop ((elements elements) (result '()))
+      (if (pair? elements)
+         (let ((element (car elements))
+               (elements (cdr elements)))
+           (if (string? element)
+               (let ((buffer
+                      (string->parser-buffer
+                       (normalize-line-endings element))))
+                 (let normalize-string
+                     ((port (open-output-string))
+                      (result result))
+                   (let* ((p (get-parser-buffer-pointer buffer))
+                          (char (read-parser-buffer-char buffer)))
+                     (case char
+                       ((#f)
+                        (loop elements
+                              (cons (get-output-string port) result)))
+                       ((#\tab #\newline #\return)
+                        (write-char #\space port)
+                        (normalize-string port result))
+                       ((#\&)
+                        (set-parser-buffer-pointer! buffer p)
+                        (let ((v (parse-char-reference buffer)))
+                          (if v
+                              (begin
+                                (write-string (vector-ref v 0) port)
+                                (normalize-string port result))
+                              (normalize-string
+                               (open-output-string)
+                               (let ((name
+                                      (vector-ref
+                                       (parse-entity-reference-name buffer)
+                                       0))
+                                     (result
+                                      (cons (get-output-string port) result)))
+                                 (let ((value
+                                        (vector-ref
+                                         (dereference-entity name #f p)
+                                         0)))
+                                   (if (string? value)
+                                       (expand-entity-value name p
+                                         (lambda ()
+                                           (loop (list value) result)))
+                                       (cons value result))))))))
+                       (else
+                        (write-char char port)
+                        (normalize-string port result))))))
+               (loop elements (cons element result))))
+         result)))))
 
 (define (trim-attribute-whitespace string)
   (call-with-output-string
@@ -636,19 +672,18 @@ USA.
     entity))
 
 (define (dereference-parameter-entity name)
-  (let ((value
+  (let ((elements
         (and (not (eq? *parameter-entities* 'STOP))
              (let ((entity (find-parameter-entity name)))
                (and entity
                     (xml-parameter-!entity-value entity))))))
-    (if (or (string? value)
-           (xml-uninterpreted? value))
-       value
+    (if (and (string? (car elements))
+            (null? (cdr elements)))
+       (car elements)
        (begin
          (set! *parameter-entities* 'STOP)
          (set! *general-entities* 'STOP)
-         (make-xml-uninterpreted
-          (string-append "%" (symbol-name name) ";"))))))
+         (make-xml-parameter-entity-ref name)))))
 
 (define (find-parameter-entity name)
   (let loop ((entities *parameter-entities*))
@@ -661,9 +696,9 @@ USA.
 \f
 ;;;; General parsed entities
 
-(define (dereference-entity name p)
+(define (dereference-entity name expand? p)
   (if (eq? *general-entities* 'STOP)
-      (uninterpreted-entity name)
+      (vector (make-xml-entity-ref name))
       (begin
        (if (assq name *entity-expansion-nesting*)
            (perror p "Circular entity reference" name))
@@ -672,24 +707,32 @@ USA.
              (begin
                (if (xml-unparsed-!entity? entity)
                    (perror p "Reference to unparsed entity" name))
-               (let ((value (xml-!entity-value entity)))
-                 (cond ((string? value) (expand-entity-value name value p))
-                       ((xml-uninterpreted? value) (vector value))
-                       (else (uninterpreted-entity name)))))
+               (let ((elements (xml-!entity-value entity)))
+                 (if (and (string? (car elements))
+                          (null? (cdr elements)))
+                     (if expand?
+                         (expand-entity-value-string name (car elements) p)
+                         (vector (car elements)))
+                     (vector (make-xml-entity-ref name)))))
              (begin
                (if (or *standalone?* *internal-dtd?*)
                    (perror p "Reference to undefined entity" name))
-               (uninterpreted-entity name)))))))
-
-(define (expand-entity-value name value p)
-  (let ((buffer (string->parser-buffer value)))
-    (let ((v
-          (fluid-let ((*entity-expansion-nesting*
-                       (cons (cons name p) *entity-expansion-nesting*)))
-            (parse-content buffer))))
-      (if (or (not v) (peek-parser-buffer-char buffer))
-         (perror p "Malformed entity reference" value))
-      v)))
+               (vector (make-xml-entity-ref name))))))))
+
+(define (expand-entity-value-string name string p)
+  (let ((v
+        (expand-entity-value name p
+          (lambda ()
+            ((*parser (complete parse-content))
+             (string->parser-buffer string))))))
+    (if (not v)
+       (perror p "Malformed entity reference" string))
+    v))
+
+(define (expand-entity-value name p thunk)
+  (fluid-let ((*entity-expansion-nesting*
+              (cons (cons name p) *entity-expansion-nesting*)))
+    (thunk)))
 
 (define (find-entity name)
   (let loop ((entities *general-entities*))
@@ -701,15 +744,12 @@ USA.
             (car entities)
             (loop (cdr entities))))))
 
-(define (uninterpreted-entity name)
-  (vector (make-xml-uninterpreted (string-append "&" (symbol-name name) ";"))))
-
 (define (predefined-entities)
-  (list (make-xml-!entity (xml-intern "lt") "&#60;")
-       (make-xml-!entity (xml-intern "gt") ">")
-       (make-xml-!entity (xml-intern "amp") "&#38;")
-       (make-xml-!entity (xml-intern "quot") "\"")
-       (make-xml-!entity (xml-intern "apos") "'")))
+  (list (make-xml-!entity 'lt '("&#60;"))
+       (make-xml-!entity 'gt '(">"))
+       (make-xml-!entity 'amp '("&#38;"))
+       (make-xml-!entity 'quot '("\""))
+       (make-xml-!entity 'apos '("'"))))
 
 (define *general-entities*)
 (define *entity-expansion-nesting* '())
@@ -759,11 +799,15 @@ USA.
    (alt (with-pointer p
          (transform
              (lambda (v)
-               (parse-coalesced-element parse-external-subset-decl
-                                        (list " " (vector-ref v 0) " ")
-                                        "parameter-entity value"
-                                        p))
-              parse-parameter-entity-reference))
+               (let ((value (vector-ref v 0)))
+                 (if (string? value)
+                     (parse-coalesced-element parse-external-subset-decl
+                                              (vector
+                                               (string-append " " value " "))
+                                              "parameter-entity value"
+                                              p)
+                     v)))
+           parse-parameter-entity-reference))
        S)))
 
 (define parse-internal-markup-decl     ;[29]
@@ -784,16 +828,10 @@ USA.
                  S?
                  (alt (encapsulate (lambda (v) (cons 'ALT (vector->list v)))
                         (seq parse-cp
-                             (+ (seq S?
-                                     (noise (string "|"))
-                                     S?
-                                     parse-cp))))
+                             (+ (seq S? "|" S? parse-cp))))
                       (encapsulate (lambda (v) (cons 'SEQ (vector->list v)))
                         (seq parse-cp
-                             (* (seq S?
-                                     (noise (string ","))
-                                     S?
-                                     parse-cp)))))
+                             (* (seq S? "," S? parse-cp)))))
                  S?)
                (? (match (char-set "?*+")))))))
 
@@ -819,22 +857,18 @@ USA.
         parse-required-name
         S
         ;;[46]
-        (alt (map xml-intern (match (string "EMPTY")))
-             (map xml-intern (match (string "ANY")))
+        (alt (map intern (match (string "EMPTY")))
+             (map intern (match (string "ANY")))
              ;;[51]
              (encapsulate (lambda (v) (cons 'MIX (vector->list v)))
                (with-pointer p
-                 (seq (noise (string "("))
+                 (seq "("
                       S?
-                      (noise (string "#PCDATA"))
-                      (alt (seq S?
-                                (noise (string ")")))
-                           (seq (* (seq S?
-                                        (noise (string "|"))
-                                        S?
-                                        parse-required-name))
+                      "#PCDATA"
+                      (alt (seq S? ")")
+                           (seq (* (seq S? "|" S? parse-required-name))
                                 S?
-                                (noise (string ")*")))
+                                ")*")
 
                            (sexp
                             (lambda (buffer)
@@ -856,7 +890,7 @@ USA.
                        (type (vector-ref v 1))
                        (default (vector-ref v 2)))
                    (list name type
-                         (if (and (not (eq? type (xml-intern "CDATA")))
+                         (if (and (not (eq? type 'CDATA))
                                   (pair? default))
                              (list (car default)
                                    (trim-attribute-whitespace (cadr default)))
@@ -864,54 +898,53 @@ USA.
              (seq S
                   parse-name
                   S
-                  ;;[54,57]
-                  (alt (map xml-intern
-                            ;;[55,56]
-                            (alt (match (string "CDATA"))
-                                 (match (string "IDREFS"))
-                                 (match (string "IDREF"))
-                                 (match (string "ID"))
-                                 (match (string "ENTITY"))
-                                 (match (string "ENTITIES"))
-                                 (match (string "NMTOKENS"))
-                                 (match (string "NMTOKEN"))))
-                       ;;[58]
-                       (encapsulate
-                           (lambda (v)
-                             (cons 'NOTATION (vector->list v)))
-                         (bracket "notation type"
-                             (noise (seq (string "NOTATION") S (string "(")))
-                             (noise (string ")"))
-                           S?
-                           parse-required-name
-                           (* (seq (noise (seq S? (string "|") S?))
-                                   parse-required-name))
-                           S?))
-                       ;;[59]
-                       (encapsulate
-                           (lambda (v)
-                             (cons 'ENUMERATED (vector->list v)))
-                         (sbracket "enumerated type" "(" ")"
-                           S?
-                           parse-required-name-token
-                           (* (seq S?
-                                   (noise (string "|"))
-                                   S?
-                                   parse-required-name-token))
-                           S?)))
+                  parse-!attlist-type
                   S
-                  ;;[60]
-                  (alt (map xml-intern
-                            (alt (match (string "#REQUIRED"))
-                                 (match (string "#IMPLIED"))))
-                       (encapsulate vector->list
-                         (seq (map xml-intern
-                                   (match (string "#FIXED")))
-                              S
-                              parse-attribute-value))
-                       (map (lambda (v) (list 'DEFAULT v))
-                            parse-attribute-value))))))
+                  parse-!attlist-default))))
        S?))))
+
+(define parse-!attlist-type            ;[54,57]
+  (*parser
+   (alt (map intern
+            ;;[55,56]
+            (alt (match (string "CDATA"))
+                 (match (string "IDREFS"))
+                 (match (string "IDREF"))
+                 (match (string "ID"))
+                 (match (string "ENTITY"))
+                 (match (string "ENTITIES"))
+                 (match (string "NMTOKENS"))
+                 (match (string "NMTOKEN"))))
+       ;;[58]
+       (encapsulate (lambda (v) (cons 'NOTATION (vector->list v)))
+         (bracket "notation type"
+             (noise (seq (string "NOTATION") S (string "(")))
+             ")"
+           S?
+           parse-required-name
+           (* (seq S? "|" S? parse-required-name))
+           S?))
+       ;;[59]
+       (encapsulate (lambda (v) (cons 'ENUMERATED (vector->list v)))
+         (sbracket "enumerated type" "(" ")"
+           S?
+           parse-required-name-token
+           (* (seq S? "|" S? parse-required-name-token))
+           S?)))))
+
+(define parse-!attlist-default         ;[60]
+  (*parser
+   (alt (seq "#"
+            (map intern
+                 (alt (match (string "REQUIRED"))
+                      (match (string "IMPLIED")))))
+       (encapsulate (lambda (v) (cons (vector-ref v 0) (vector-ref v 1)))
+         (seq "#"
+              (map intern (match (string "FIXED")))
+              S
+              parse-attribute-value))
+       (encapsulate (lambda (v) (cons 'DEFAULT (vector-ref v 0)))
+         parse-attribute-value))))
 \f
 (define parse-!entity                  ;[70,71,72,73,74,76]
   (*parser
@@ -920,7 +953,7 @@ USA.
      (alt (encapsulate
              (lambda (v)
                (make-parameter-entity (vector-ref v 0) (vector-ref v 1)))
-           (seq (noise (string "%"))
+           (seq "%"
                 S
                 parse-required-name
                 S
@@ -937,10 +970,7 @@ USA.
                 S
                 (alt parse-entity-value
                      (seq parse-external-id
-                          (? (seq S
-                                  (noise (string "NDATA"))
-                                  S
-                                  parse-required-name)))))))
+                          (? (seq S "NDATA" S parse-required-name)))))))
      S?)))
 
 (define parse-!notation                        ;[82,83]
@@ -954,7 +984,7 @@ USA.
        (alt parse-external-id
            (encapsulate
                (lambda (v) (make-xml-external-id (vector-ref v 0) #f))
-             (seq (noise (string "PUBLIC"))
+             (seq "PUBLIC"
                   S
                   parse-public-id-literal)))
        S?))))
@@ -965,13 +995,13 @@ USA.
      (alt (encapsulate
              (lambda (v)
                (make-external-id #f (vector-ref v 0) p))
-           (seq (noise (string "SYSTEM"))
+           (seq "SYSTEM"
                 S
                 parse-system-literal))
          (encapsulate
              (lambda (v)
                (make-external-id (vector-ref v 0) (vector-ref v 1) p))
-           (seq (noise (string "PUBLIC"))
+           (seq "PUBLIC"
                 S
                 parse-public-id-literal
                 S
@@ -1008,10 +1038,7 @@ USA.
        (with-pointer p
         (transform
             (lambda (v)
-              (parse-coalesced-element parse-decl
-                                       (vector->list v)
-                                       "markup declaration"
-                                       p))
+              (parse-coalesced-element parse-decl v "markup declaration" p))
           (seq
            (match prefix)
            (require-success "Malformed markup declaration"
@@ -1096,7 +1123,7 @@ USA.
      (transform
         (lambda (v)
           (parse-coalesced-element parse-conditional-section
-                                   (vector->list v)
+                                   v
                                    "conditional section"
                                    p))
        (bracket "parameterized conditional section"
index 57f65f34999d818f73226cda629b1564f29934d7..e908e7f94e3decf6887599eb75ee3998b4c9bb46 100644 (file)
@@ -1,6 +1,6 @@
 #| -*-Scheme-*-
 
-$Id: xml-struct.scm,v 1.9 2003/02/14 18:28:38 cph Exp $
+$Id: xml-struct.scm,v 1.10 2003/03/01 16:53:39 cph Exp $
 
 Copyright 2001,2002,2003 Massachusetts Institute of Technology
 
@@ -27,134 +27,341 @@ USA.
 
 (declare (usual-integrations))
 \f
-(define-structure (xml-document
-                  (type-descriptor xml-document-rtd))
-  declaration
-  misc-1
-  dtd
-  misc-2
-  root
-  misc-3)
-
-(define-structure (xml-declaration
-                  (type-descriptor xml-declaration-rtd))
-  version
-  encoding
-  standalone)
-
-(define-structure (xml-element
-                  (type-descriptor xml-element-rtd)
-                  (print-procedure
-                   (standard-unparser-method 'XML-ELEMENT
-                     (lambda (element port)
-                       (write-char #\space port)
-                       (write (xml-element-name element) port)))))
-  name
-  attributes
-  contents)
-
-(define-structure (xml-comment
-                  (type-descriptor xml-comment-rtd))
-  text)
-
-(define-structure (xml-processing-instructions
-                  (type-descriptor xml-processing-instructions-rtd)
-                  (print-procedure
-                   (standard-unparser-method 'XML-PROCESSING-INSTRUCTIONS
-                     (lambda (element port)
-                       (write-char #\space port)
-                       (write (xml-processing-instructions-name element)
-                              port)))))
-  name
-  text)
-
-(define-structure (xml-uninterpreted
-                  (type-descriptor xml-uninterpreted-rtd))
-  text)
-
 (define (xml-intern name)
+  (if (not (and (string? name) (string-is-xml-nmtoken? name)))
+      (error:wrong-type-argument name "XML nmtoken string" 'XML-INTERN))
   (string->symbol name))
 
-(define-structure (xml-dtd
-                  (type-descriptor xml-dtd-rtd)
-                  (print-procedure
-                   (standard-unparser-method 'XML-DTD
-                     (lambda (dtd port)
-                       (write-char #\space port)
-                       (write (xml-dtd-root dtd) port)))))
-  root
-  external
-  internal)
-
-(define-structure (xml-external-id
-                  (type-descriptor xml-external-id-rtd)
-                  (print-procedure
-                   (standard-unparser-method 'XML-EXTERNAL-ID
-                     (lambda (dtd port)
-                       (write-char #\space port)
-                       (write (or (xml-external-id-id dtd)
-                                  (xml-external-id-uri dtd))
-                              port)))))
-  id
-  uri)
+(define (xml-name? object)
+  (and (symbol? object)
+       (string-is-xml-name? (symbol-name object))))
+
+(define (xml-nmtoken? object)
+  (and (symbol? object)
+       (string-is-xml-nmtoken? (symbol-name object))))
+
+(define (string-is-xml-name? string)
+  (let ((buffer (string->parser-buffer string)))
+    (and (match-utf8-char-in-alphabet buffer alphabet:name-initial)
+        (let loop ()
+          (if (peek-parser-buffer-char buffer)
+              (and (match-utf8-char-in-alphabet buffer
+                                                alphabet:name-subsequent)
+                   (loop))
+              #t)))))
+
+(define (string-is-xml-nmtoken? string)
+  (let ((buffer (string->parser-buffer string)))
+    (let loop ()
+      (and (match-utf8-char-in-alphabet buffer alphabet:name-subsequent)
+          (if (peek-parser-buffer-char buffer)
+              (loop)
+              #t)))))
+
+(define (xml-whitespace-string? object)
+  (string-composed-of? object char-set:xml-whitespace))
+
+(define (string-composed-of? string char-set)
+  (and (string? string)
+       (substring-composed-of? string 0 (string-length string) char-set)))
+
+(define (substring-composed-of? string start end char-set)
+  (let loop ((index start))
+    (or (fix:= index end)
+       (and (char-set-member? char-set (string-ref string index))
+            (loop (fix:+ index 1))))))
 \f
-(define-structure (xml-!element
-                  (type-descriptor xml-!element-rtd)
-                  (print-procedure
-                   (standard-unparser-method 'XML-!ELEMENT
-                     (lambda (element port)
-                       (write-char #\space port)
-                       (write (xml-!element-name element) port)))))
-  name
-  content-type)
-
-(define-structure (xml-!attlist
-                  (type-descriptor xml-!attlist-rtd)
-                  (print-procedure
-                   (standard-unparser-method 'XML-!ATTLIST
-                     (lambda (element port)
-                       (write-char #\space port)
-                       (write (xml-!attlist-name element) port)))))
-  name
-  definitions)
-
-(define-structure (xml-!entity
-                  (type-descriptor xml-!entity-rtd)
-                  (print-procedure
-                   (standard-unparser-method 'XML-!ENTITY
-                     (lambda (element port)
-                       (write-char #\space port)
-                       (write (xml-!entity-name element) port)))))
-  name
-  value)
-
-(define-structure (xml-unparsed-!entity
-                  (type-descriptor xml-unparsed-!entity-rtd)
-                  (print-procedure
-                   (standard-unparser-method 'XML-UNPARSED-!ENTITY
-                     (lambda (element port)
-                       (write-char #\space port)
-                       (write (xml-unparsed-!entity-name element) port)))))
-  name
-  id
-  notation)
-
-(define-structure (xml-parameter-!entity
-                  (type-descriptor xml-parameter-!entity-rtd)
-                  (print-procedure
-                   (standard-unparser-method 'XML-PARAMETER-!ENTITY
-                     (lambda (element port)
-                       (write-char #\space port)
-                       (write (xml-parameter-!entity-name element) port)))))
-  name
-  value)
-
-(define-structure (xml-!notation
-                  (type-descriptor xml-!notation-rtd)
-                  (print-procedure
-                   (standard-unparser-method 'XML-!NOTATION
-                     (lambda (element port)
-                       (write-char #\space port)
-                       (write (xml-!notation-name element) port)))))
-  name
-  id)
\ No newline at end of file
+(define-syntax define-xml-type
+  (sc-macro-transformer
+   (lambda (form environment)
+     (if (syntax-match? '(IDENTIFIER * (IDENTIFIER EXPRESSION)) (cdr form))
+        (let ((root (symbol-append 'XML- (cadr form)))
+              (slots (cddr form)))
+          (let ((rtd (symbol-append root '-RTD))
+                (constructor (symbol-append 'MAKE- root))
+                (slot-vars
+                 (map (lambda (slot)
+                        (close-syntax (car slot) environment))
+                      slots)))
+            (let ((test
+                   (lambda (slot var name)
+                     `(IF (NOT (,(close-syntax (cadr slot) environment) ,var))
+                          (ERROR:WRONG-TYPE-ARGUMENT
+                           ,var ,(symbol->string (car slot)) ',name)))))
+              `(BEGIN
+                 (DEFINE ,rtd
+                   (MAKE-RECORD-TYPE ',root '(,@(map car slots))))
+                 (DEFINE ,(symbol-append root '?)
+                   (RECORD-PREDICATE ,rtd))
+                 (DEFINE ,constructor
+                   (LET ((CONSTRUCTOR
+                          (RECORD-CONSTRUCTOR ,rtd '(,@(map car slots)))))
+                     (NAMED-LAMBDA (,constructor ,@slot-vars)
+                       ,@(map (lambda (slot var) (test slot var constructor))
+                              slots slot-vars)
+                       (CONSTRUCTOR ,@slot-vars))))
+                 ,@(map (lambda (slot var)
+                          (let* ((accessor (symbol-append root '- (car slot)))
+                                 (modifier (symbol-append 'SET- accessor '!)))
+                            `(BEGIN
+                               (DEFINE ,accessor
+                                 (RECORD-ACCESSOR ,rtd ',(car slot)))
+                               (DEFINE ,modifier
+                                 (LET ((MODIFIER
+                                        (RECORD-MODIFIER ,rtd ',(car slot))))
+                                   (NAMED-LAMBDA (,modifier OBJECT ,var)
+                                     ,(test slot var modifier)
+                                     (MODIFIER OBJECT ,var)))))))
+                        slots
+                        slot-vars)))))
+        (ill-formed-syntax form)))))
+\f
+(define-xml-type document
+  (declaration (lambda (object) (or (not object) (xml-declaration? object))))
+  (misc-1 misc-arg?)
+  (dtd (lambda (object) (or (not object) (xml-dtd? object))))
+  (misc-2 misc-arg?)
+  (root xml-element?)
+  (misc-3 misc-arg?))
+
+(define (misc-arg? object)
+  (list-of-type? object
+    (lambda (object)
+      (or (xml-comment? object)
+         (xml-whitespace-string? object)
+         (xml-processing-instructions? object)))))
+
+(define-xml-type declaration
+  (version xml-version?)
+  (encoding xml-encoding?)
+  (standalone (lambda (object) (member object '(#f "yes" "no")))))
+
+(define (xml-version? object)
+  (and (string-composed-of? object char-set:xml-version)
+       (fix:> (string-length object) 0)))
+
+(define char-set:xml-version
+  (char-set-union char-set:alphanumeric
+                 (string->char-set "_.:-")))
+
+(define (xml-encoding? object)
+  (or (not object)
+      (and (string? object)
+          (let ((end (string-length object)))
+            (and (fix:> end 0)
+                 (char-alphabetic? (string-ref object 0))
+                 (substring-composed-of? object 1 end
+                                         char-set:xml-encoding))))))
+
+(define char-set:xml-encoding
+  (char-set-union char-set:alphanumeric
+                 (string->char-set "_.-")))
+
+(define-xml-type element
+  (name xml-name?)
+  (attributes
+   (lambda (object)
+     (list-of-type? object
+       (lambda (object)
+        (and (pair? object)
+             (xml-name? (car object))
+             (attribute-value? (cdr object)))))))
+  (contents
+   (lambda (object)
+     (list-of-type? object
+       (lambda (object)
+        (or (string? object)
+            (xml-comment? object)
+            (xml-element? object)
+            (xml-processing-instructions? object)
+            (xml-entity-ref? object)))))))
+
+(define (attribute-value? object)
+  (and (pair? object)
+       (list-of-type? object
+        (lambda (object)
+          (or (string? object)
+              (xml-entity-ref? object))))))
+
+(define-xml-type comment
+  (text string?))
+
+(define-xml-type processing-instructions
+  (name
+   (lambda (object)
+     (and (xml-name? object)
+         (not (string-ci=? "xml" (symbol-name object))))))
+  (text string?))
+\f
+(define-xml-type dtd
+  (root xml-name?)
+  (external
+   (lambda (object)
+     (or (not object)
+        (xml-external-id? object))))
+  (internal
+   (lambda (object)
+     (list-of-type? object
+       (lambda (object)
+        (or (xml-whitespace-string? object)
+            (xml-comment? object)
+            (xml-!element? object)
+            (xml-!attlist? object)
+            (xml-!entity? object)
+            (xml-unparsed-!entity? object)
+            (xml-parameter-!entity? object)
+            (xml-!notation? object)
+            (xml-parameter-entity-ref? object)))))))
+
+(define-xml-type external-id
+  (id
+   (lambda (object)
+     (or (not object)
+        (public-id? object))))
+  (uri
+   (lambda (object)
+     (or (not object)
+        (string? object)))))
+
+(define (public-id? object)
+  (string-composed-of? object char-set:xml-public-id))
+
+(define char-set:xml-public-id
+  (char-set-union char-set:alphanumeric
+                 (string->char-set " \r\n-'()+,./:=?;!*#@$_%")))
+
+(define-xml-type !element
+  (name xml-name?)
+  (content-type
+   (lambda (object)
+     (or (eq? object 'EMPTY)
+        (eq? object 'ANY)
+        (and (pair? object)
+             (eq? 'MIX (car object))
+             (list-of-type? (cdr object) xml-name?))
+        (letrec
+            ((children?
+              (lambda (object)
+                (maybe-wrapped object
+                  (lambda (object)
+                    (and (pair? object)
+                         (or (eq? 'ALT (car object))
+                             (eq? 'SEQ (car object)))
+                         (list-of-type? (cdr object) cp?))))))
+             (cp?
+              (lambda (object)
+                (or (maybe-wrapped object xml-name?)
+                    (children? object))))
+             (maybe-wrapped
+              (lambda (object pred)
+                (or (pred object)
+                    (and (pair? object)
+                         (or (eq? #\? (car object))
+                             (eq? #\* (car object))
+                             (eq? #\+ (car object)))
+                         (pair? (cdr object))
+                         (pred (cadr object))
+                         (null? (cddr object)))))))
+          (children? object))))))
+\f
+(define-xml-type !attlist
+  (name xml-name?)
+  (definitions
+    (lambda (object)
+      (list-of-type? object
+       (lambda (item)
+         (and (pair? item)
+              (xml-name? (car item))
+              (pair? (cdr item))
+              (!attlist-type? (cadr item))
+              (pair? (cddr item))
+              (!attlist-default? (caddr item))
+              (null? (cdddr item))))))))
+
+(define (!attlist-type? object)
+  (or (eq? object 'CDATA)
+      (eq? object 'IDREFS)
+      (eq? object 'IDREF)
+      (eq? object 'ID)
+      (eq? object 'ENTITY)
+      (eq? object 'ENTITIES)
+      (eq? object 'NMTOKENS)
+      (eq? object 'NMTOKEN)
+      (and (pair? object)
+          (eq? 'NOTATION (car object))
+          (list-of-type? (cdr object) xml-name?))
+      (and (pair? object)
+          (eq? 'ENUMERATED (car object))
+          (list-of-type? (cdr object) xml-nmtoken?))))
+
+(define (!attlist-default? object)
+  (or (eq? object 'REQUIRED)
+      (eq? object 'IMPLIED)
+      (and (pair? object)
+          (eq? 'FIXED (car object))
+          (attribute-value? (cdr object)))
+      (and (pair? object)
+          (eq? 'DEFAULT (car object))
+          (attribute-value? (cdr object)))))
+
+(define-xml-type !entity
+  (name xml-name?)
+  (value entity-value?))
+
+(define-xml-type unparsed-!entity
+  (name xml-name?)
+  (id xml-external-id?)
+  (notation xml-name?))
+
+(define-xml-type parameter-!entity
+  (name xml-name?)
+  (value entity-value?))
+
+(define (entity-value? object)
+  (or (and (pair? object)
+          (list-of-type? object
+            (lambda (object)
+              (or (string? object)
+                  (xml-entity-ref? object)
+                  (xml-parameter-entity-ref? object)))))
+      (xml-external-id? object)))
+
+(define-xml-type !notation
+  (name xml-name?)
+  (id xml-external-id?))
+
+(define-xml-type entity-ref
+  (name xml-name?))
+
+(define-xml-type parameter-entity-ref
+  (name xml-name?))
+\f
+(define-syntax define-xml-printer
+  (sc-macro-transformer
+   (lambda (form environment)
+     (if (syntax-match? '(IDENTIFIER EXPRESSION) (cdr form))
+        (let ((name (cadr form))
+              (accessor (caddr form)))
+          (let ((root (symbol-append 'XML- name)))
+            `(SET-RECORD-TYPE-UNPARSER-METHOD!
+              ,(close-syntax (symbol-append root '-RTD) environment)
+              (STANDARD-UNPARSER-METHOD ',root
+                (LAMBDA (,name PORT)
+                  (WRITE-CHAR #\SPACE PORT)
+                  (WRITE (,(close-syntax accessor environment) ,name)
+                         PORT))))))
+        (ill-formed-syntax form)))))
+
+(define-xml-printer element xml-element-name)
+(define-xml-printer processing-instructions xml-processing-instructions-name)
+(define-xml-printer dtd xml-dtd-root)
+(define-xml-printer external-id
+  (lambda (dtd)
+    (or (xml-external-id-id dtd)
+       (xml-external-id-uri dtd))))
+(define-xml-printer !element xml-!element-name)
+(define-xml-printer !attlist xml-!attlist-name)
+(define-xml-printer !entity xml-!entity-name)
+(define-xml-printer unparsed-!entity xml-unparsed-!entity-name)
+(define-xml-printer parameter-!entity xml-parameter-!entity-name)
+(define-xml-printer !notation xml-!notation-name)
\ No newline at end of file
index 36a4ff117c8094f8f5d21d9f9753cdabc82139fc..e46327b5e431341a0f63acfd8b8bacabae8bf6fe 100644 (file)
@@ -1,6 +1,6 @@
 #| -*-Scheme-*-
 
-$Id: xml.pkg,v 1.16 2003/02/14 18:28:38 cph Exp $
+$Id: xml.pkg,v 1.17 2003/03/01 16:52:30 cph Exp $
 
 Copyright 2001,2002,2003 Massachusetts Institute of Technology
 
@@ -45,10 +45,11 @@ USA.
          make-xml-document
          make-xml-dtd
          make-xml-element
+         make-xml-entity-ref
          make-xml-external-id
          make-xml-parameter-!entity
+         make-xml-parameter-entity-ref
          make-xml-processing-instructions
-         make-xml-uninterpreted
          make-xml-unparsed-!entity
          set-xml-!attlist-definitions!
          set-xml-!attlist-name!
@@ -74,13 +75,14 @@ USA.
          set-xml-element-attributes!
          set-xml-element-contents!
          set-xml-element-name!
+         set-xml-entity-ref-name!
          set-xml-external-id-id!
          set-xml-external-id-uri!
          set-xml-parameter-!entity-name!
          set-xml-parameter-!entity-value!
+         set-xml-parameter-entity-ref-name!
          set-xml-processing-instructions-name!
          set-xml-processing-instructions-text!
-         set-xml-uninterpreted-text!
          set-xml-unparsed-!entity-id!
          set-xml-unparsed-!entity-name!
          set-xml-unparsed-!entity-notation!
@@ -126,27 +128,33 @@ USA.
          xml-element-name
          xml-element-rtd
          xml-element?
+         xml-entity-ref-name
+         xml-entity-ref-rtd
+         xml-entity-ref?
          xml-external-id-id
          xml-external-id-rtd
          xml-external-id-uri
          xml-external-id?
          xml-intern
+         xml-name?
+         xml-nmtoken?
          xml-parameter-!entity-name
          xml-parameter-!entity-rtd
          xml-parameter-!entity-value
          xml-parameter-!entity?
+         xml-parameter-entity-ref-name
+         xml-parameter-entity-ref-rtd
+         xml-parameter-entity-ref?
          xml-processing-instructions-name
          xml-processing-instructions-rtd
          xml-processing-instructions-text
          xml-processing-instructions?
-         xml-uninterpreted-rtd
-         xml-uninterpreted-text
-         xml-uninterpreted?
          xml-unparsed-!entity-id
          xml-unparsed-!entity-name
          xml-unparsed-!entity-notation
          xml-unparsed-!entity-rtd
-         xml-unparsed-!entity?))
+         xml-unparsed-!entity?
+         xml-whitespace-string?))
 
 (define-package (runtime xml parser)
   (files "xml-chars" "xml-parser")
@@ -157,7 +165,10 @@ USA.
          read-xml
          read-xml-file
          string->xml
-         substring->xml))
+         substring->xml)
+  (export (runtime xml structure)
+         alphabet:name-initial
+         alphabet:name-subsequent))
 
 (define-package (runtime xml output)
   (files "xml-output")