Reorganize for presentation.
authorChris Hanson <org/chris-hanson/cph>
Tue, 10 Jul 2001 19:34:32 +0000 (19:34 +0000)
committerChris Hanson <org/chris-hanson/cph>
Tue, 10 Jul 2001 19:34:32 +0000 (19:34 +0000)
v7/src/xml/xml-parser.scm

index 82e3b9863dfaedc331bc888c957b426f70f63af5..dc65014da1858f6c288ad9e3c8439529f9f9af4e 100644 (file)
@@ -1,6 +1,6 @@
 ;;; -*-Scheme-*-
 ;;;
-;;; $Id: xml-parser.scm,v 1.5 2001/07/10 17:50:17 cph Exp $
+;;; $Id: xml-parser.scm,v 1.6 2001/07/10 19:34:32 cph Exp $
 ;;;
 ;;; Copyright (c) 2001 Massachusetts Institute of Technology
 ;;;
 \f
 ;;;; Normalization
 
+(define (normalize-attribute-value value p)
+  (with-string-output-port
+    (lambda (port)
+      (let normalize-value ((value value))
+       (if (string? value)
+           (let ((buffer (string->parser-buffer value)))
+             (let loop ()
+               (let ((char (peek-parser-buffer-char buffer)))
+                 (cond ((not char)
+                        unspecific)
+                       ((or (char=? char #\tab)
+                            (char=? char #\newline))
+                        (write-char #\space port)
+                        (read-parser-buffer-char buffer)
+                        (loop))
+                       ((char=? char #\&)
+                        (normalize-value
+                         (vector-ref (parse-reference buffer)
+                                     0))
+                        (loop))
+                       (else
+                        (write-char char port)
+                        (read-parser-buffer-char buffer)
+                        (loop))))))
+           (perror p "Reference to external entity in attribute"))))))
+
 (define (normalize-line-endings string #!optional always-copy?)
   (if (string-find-next-char string #\return)
       (let ((end (string-length string)))
               always-copy?)
          (string-copy string)
          string)))
-
-(define (normalize-attribute-value value p)
-  (with-string-output-port
-    (lambda (port)
-      (let normalize-value ((value value))
-       (if (string? value)
-           (let ((buffer (string->parser-buffer value)))
-             (let loop ()
-               (let ((char (peek-parser-buffer-char buffer)))
-                 (cond ((not char)
-                        unspecific)
-                       ((or (char=? char #\tab)
-                            (char=? char #\newline))
-                        (write-char #\space port)
-                        (read-parser-buffer-char buffer)
-                        (loop))
-                       ((char=? char #\&)
-                        (normalize-value
-                         (vector-ref (parse-reference buffer)
-                                     0))
-                        (loop))
-                       (else
-                        (write-char char port)
-                        (read-parser-buffer-char buffer)
-                        (loop))))))
-           (perror p "Reference to external entity in attribute"))))))
 \f
-;;;; Document-type declarations
-
-(define parse-dtd                      ;[28]
-  (*parser
-   (top-level
-    (encapsulate
-       (lambda (v)
-         (make-xml-dtd (vector-ref v 0)
-                       (vector-ref v 1)
-                       (vector-ref v 2)))
-      (sbracket "document-type declaration" "<!DOCTYPE" ">"
-       (require-success "Malformed document type"
-         (seq S
-              parse-name
-              (map (lambda (external)
-                     (if external (set! *internal-dtd?* #f))
-                     external)
-                   (alt (seq S parse-external-id)
-                        (values #f)))
-              S?
-              (alt (seq (sbracket "internal DTD" "[" "]"
-                          parse-internal-subset)
-                        S?)
-                   (values '())))))))))
-
-(define (parse-internal-subset buffer)
-  (fluid-let ((*parameter-entities* '()))
-    (let loop ((elements '#()))
-      (let ((v
-            (or (parse-internal-markup-decl buffer)
-                (parse-decl-separator buffer))))
-       (if v
-           (loop (vector-append elements v))
-           (vector (vector->list elements)))))))
+;;;; Parameter entities
 
-(define parse-decl-separator           ;[28a]
-  (*parser
-   (alt (with-pointer p
-         (map (lambda (value)
-                (parse-coalesced-element parse-external-subset-decl
-                                         (list " " value " ")
-                                         "parameter-entity value"
-                                         p))
-              parse-parameter-entity-reference))
-       S)))
-
-(define parse-internal-markup-decl     ;[29]
-  (*parser
-   (alt parse-!element
-       parse-!attlist
-       parse-!entity
-       parse-!notation
-       parse-processing-instructions
-       parse-comment)))
-\f
 (define (make-parameter-entity name value)
   (let ((entity (make-xml-parameter-!entity name value)))
     (if (and (not (eq? *parameter-entities* 'STOP))
             (loop (cdr entities))))))
 
 (define *parameter-entities*)
-
-(define parse-external-subset-decl     ;[31]
-  (*parser
-   (* (alt parse-external-markup-decl
-          parse-conditional-section
-          parse-decl-separator))))
 \f
+;;;; General parsed entities
+
 (define (dereference-entity name p)
   (if (eq? *general-entities* 'STOP)
       (uninterpreted-entity name)
       (perror p "Illegal external reference in standalone document"))
   (make-xml-external-id id uri))
 \f
+;;;; Document-type declarations
+
+(define parse-dtd                      ;[28]
+  (*parser
+   (top-level
+    (encapsulate
+       (lambda (v)
+         (make-xml-dtd (vector-ref v 0)
+                       (vector-ref v 1)
+                       (vector-ref v 2)))
+      (sbracket "document-type declaration" "<!DOCTYPE" ">"
+       (require-success "Malformed document type"
+         (seq S
+              parse-name
+              (map (lambda (external)
+                     (if external (set! *internal-dtd?* #f))
+                     external)
+                   (alt (seq S parse-external-id)
+                        (values #f)))
+              S?
+              (alt (seq (sbracket "internal DTD" "[" "]"
+                          parse-internal-subset)
+                        S?)
+                   (values '())))))))))
+
+(define (parse-internal-subset buffer)
+  (fluid-let ((*parameter-entities* '()))
+    (let loop ((elements '#()))
+      (let ((v
+            (or (parse-internal-markup-decl buffer)
+                (parse-decl-separator buffer))))
+       (if v
+           (loop (vector-append elements v))
+           (vector (vector->list elements)))))))
+
+(define parse-decl-separator           ;[28a]
+  (*parser
+   (alt (with-pointer p
+         (map (lambda (value)
+                (parse-coalesced-element parse-external-subset-decl
+                                         (list " " value " ")
+                                         "parameter-entity value"
+                                         p))
+              parse-parameter-entity-reference))
+       S)))
+
+(define parse-internal-markup-decl     ;[29]
+  (*parser
+   (alt parse-!element
+       parse-!attlist
+       parse-!entity
+       parse-!notation
+       parse-processing-instructions
+       parse-comment)))
+\f
 (define parse-!element                 ;[45]
   (letrec
       ((parse-children                 ;[47,49,50]
    (char-set-union char-set:alphanumeric
                   (string->char-set " \r\n-'()+,./:=?;!*#@$_%"))))
 \f
+;;;; External subset
+
+;; This is hairy because parameter-entity references can appear almost
+;; anywhere within DTD declarations, when the declarations appear in
+;; the external subset.  Rather than make the declaration parsing
+;; rules overly complex, we do a pre-pass to expand references in the
+;; interior of each declaration, and then reparse the expanded text.
+
+(define parse-external-subset-decl     ;[31]
+  (*parser
+   (* (alt parse-external-markup-decl
+          parse-conditional-section
+          parse-decl-separator))))
+
 (define external-decl-parser
   (let ((a1 (char-set-difference char-set:xml-char (char-set #\% #\" #\' #\>)))
        (a2 (char-set-difference char-set:xml-char (char-set #\")))
          parse-!entity
          parse-!notation))))
 \f
+;;;; Conditional sections
+
 (define parse-conditional-section      ;[61]
   (*parser
    (alt parse-!ignore-section