Initial revision.
authorChris Hanson <org/chris-hanson/cph>
Thu, 5 Jul 2001 20:38:42 +0000 (20:38 +0000)
committerChris Hanson <org/chris-hanson/cph>
Thu, 5 Jul 2001 20:38:42 +0000 (20:38 +0000)
v7/src/xml/xml-parser.scm [new file with mode: 0644]

diff --git a/v7/src/xml/xml-parser.scm b/v7/src/xml/xml-parser.scm
new file mode 100644 (file)
index 0000000..2d0a14f
--- /dev/null
@@ -0,0 +1,945 @@
+;;; -*-Scheme-*-
+;;;
+;;; $Id: xml-parser.scm,v 1.1 2001/07/05 20:38:42 cph Exp $
+;;;
+;;; Copyright (c) 2001 Massachusetts Institute of Technology
+;;;
+;;; This program is free software; you can redistribute it and/or
+;;; modify it under the terms of the GNU General Public License as
+;;; published by the Free Software Foundation; either version 2 of the
+;;; License, or (at your option) any later version.
+;;;
+;;; This program is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;; General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with this program; if not, write to the Free Software
+;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+;;; 02111-1307, USA.
+
+;;;; XML parser
+
+;;; Comments of the form [N] refer to production rules in the XML 1.0
+;;; standard, second edition, 6 October 2000.  Each such comment marks
+;;; the code that corresponds to that rule.
+
+;;; **** TO DO ****
+;;; * Attribute-value normalization (p. 29).
+
+(declare (usual-integrations))
+\f
+;;;; Structures
+
+(define-structure xml-document
+  declaration
+  misc-1
+  dtd
+  misc-2
+  root
+  misc-3)
+
+(define-structure xml-declaration
+  version
+  encoding
+  standalone?)
+
+(define-structure (xml-element
+                  (print-procedure
+                   (standard-unparser-method 'XML-ELEMENT
+                     (lambda (element port)
+                       (write-char #\space port)
+                       (write (xml-element-name element) port)))))
+  name
+  attributes
+  contents)
+
+(define-structure (xml-processing-instructions
+                  (print-procedure
+                   (standard-unparser-method 'XML-PROCESSING-INSTRUCTIONS
+                     (lambda (element port)
+                       (write-char #\space port)
+                       (write (xml-processing-instructions-name element)
+                              port)))))
+  name
+  text)
+
+(define-structure xml-comment
+  text)
+
+(define-structure (xml-entity-reference
+                  (print-procedure
+                   (standard-unparser-method 'XML-ENTITY-REFERENCE
+                     (lambda (reference port)
+                       (write-char #\space port)
+                       (write (xml-entity-reference-name reference) port)))))
+  name)
+
+(define-structure (xml-parameter-entity-reference
+                  (print-procedure
+                   (standard-unparser-method 'XML-PARAMETER-ENTITY-REFERENCE
+                     (lambda (reference port)
+                       (write-char #\space port)
+                       (write (xml-parameter-entity-reference-name reference)
+                              port)))))
+  name)
+\f
+(define-structure (xml-dtd
+                  (print-procedure
+                   (standard-unparser-method 'XML-DTD
+                     (lambda (dtd port)
+                       (write-char #\space port)
+                       (write (xml-dtd-root dtd) port)))))
+  root
+  external
+  internal)
+
+(define-structure (xml-external-id
+                  (print-procedure
+                   (standard-unparser-method 'XML-EXTERNAL-ID
+                     (lambda (dtd port)
+                       (write-char #\space port)
+                       (write (or (xml-external-id-id dtd)
+                                  (xml-external-id-uri dtd))
+                              port)))))
+  id
+  uri)
+
+(define-structure (xml-element-declaration
+                  (print-procedure
+                   (standard-unparser-method 'XML-ELEMENT-DECLARATION
+                     (lambda (element port)
+                       (write-char #\space port)
+                       (write (xml-element-declaration-name element) port)))))
+  name
+  content-type)
+
+(define-structure (xml-attribute-declaration
+                  (print-procedure
+                   (standard-unparser-method 'XML-ATTRIBUTE-DECLARATION
+                     (lambda (element port)
+                       (write-char #\space port)
+                       (write (xml-attribute-declaration-name element)
+                              port)))))
+  name
+  definitions)
+
+(define-structure xml-include-section
+  contents)
+
+(define-structure xml-ignore-section
+  contents)
+
+(define-structure (xml-entity-declaration
+                  (print-procedure
+                   (standard-unparser-method 'XML-ENTITY-DECLARATION
+                     (lambda (element port)
+                       (write-char #\space port)
+                       (write (xml-entity-declaration-name element) port)))))
+  name
+  value)
+
+(define-structure (xml-parameter-entity-declaration
+                  (print-procedure
+                   (standard-unparser-method 'XML-PARAMETER-ENTITY-DECLARATION
+                     (lambda (element port)
+                       (write-char #\space port)
+                       (write (xml-parameter-entity-declaration-name element)
+                              port)))))
+  name
+  value)
+
+(define-structure (xml-notation-declaration
+                  (print-procedure
+                   (standard-unparser-method 'XML-NOTATION-DECLARATION
+                     (lambda (element port)
+                       (write-char #\space port)
+                       (write (xml-notation-declaration-name element)
+                              port)))))
+  name
+  value)
+\f
+;;;; Utilities
+
+(define char-set:xml-char              ;[2], loose UTF-8
+  ;; The upper range of this alphabet would normally be #xFE, but XML
+  ;; doesn't use any characters larger than #x10FFFF, so the largest
+  ;; byte that can be seen is #xF4.
+  (char-set-union (char-set #\tab #\linefeed #\return)
+                 (ascii-range->char-set #x20 #xF5)))
+
+(define char-set:char-data
+  (char-set-difference char-set:xml-char (char-set #\< #\&)))
+
+(define-*parser-macro S                        ;[3]
+  `(NOISE (+ (ALPHABET CHAR-SET:XML-WHITESPACE))))
+
+(define-*parser-macro S?
+  `(NOISE (* (ALPHABET CHAR-SET:XML-WHITESPACE))))
+
+(define char-set:xml-whitespace
+  (char-set #\space #\tab #\return #\linefeed))
+
+(define (xml-intern string)
+  ;; Prevents XML names from cluttering the symbol table.
+  (or (hash-table/get xml-tokens string #f)
+      (let ((symbol (string->uninterned-symbol string)))
+       (hash-table/put! xml-tokens string symbol)
+       symbol)))
+
+(define xml-tokens
+  (make-string-hash-table))
+
+(define-*parser-macro (bracket description open close . body)
+  (let ((v (generate-uninterned-symbol)))
+    `(WITH-POINTER ,v
+       (SEQ ,open
+           ,@body
+           (ALT ,close
+                (SEXP
+                 (LAMBDA (BUFFER)
+                   BUFFER
+                   (ERROR
+                    ,(if (string? description)
+                         (string-append "Unterminated " description " at")
+                         `(STRING-APPEND "Unterminated " ,description " at"))
+                    (PARSER-BUFFER-POSITION-STRING ,v)))))))))
+
+(define-*parser-macro (sbracket description open close . body)
+  `(BRACKET ,description (NOISE (STRING ,open)) (NOISE (STRING ,close))
+     ,@body))
+
+(define-*parser-macro (require-success message body)
+  `(ALT ,body
+       (SEXP
+        (LAMBDA (BUFFER)
+          (ERROR ,(if (string? message)
+                      (string-append message " at")
+                      `(STRING-APPEND ,message " at"))
+                 (PARSER-BUFFER-POSITION-STRING BUFFER))))))
+\f
+(define (make-xml-char-reference n)
+  (if (not (or (= n #x9)
+              (= n #xA)
+              (= n #xD)
+              (<= #x20 n #xD7FF)
+              (<= #xE000 n #xFFFD)
+              (<= #x10000 n #x10FFFF)))
+      (error "Disallowed Unicode character code:" n))
+  (integer->unicode-string n))
+
+(define (integer->unicode-string n)
+  (let ((initial-char
+        (lambda (n offset)
+          (integer->char
+           (fix:or (fix:and (fix:lsh #xFF (fix:+ n 1)) #xFF)
+                   (fix:lsh n -6)))))
+       (subsequent-char
+        (lambda (offset)
+          (integer->char
+           (fix:or #x80
+                   (fix:and (fix:lsh n (fix:- 0 offset))
+                            #x3F))))))
+    (declare (integrate-operator initial-char subsequent-char))
+    (if (not (and (<= 0 n) (< n #x80000000)))
+       (error:bad-range-argument n 'INTEGER->UNICODE-STRING))
+    (cond ((< n #x00000080)
+          (string (integer->char n)))
+         ((< n #x00000800)
+          (string (initial-char 5 6)
+                  (subsequent-char 6)))
+         ((< n #x00010000)
+          (string (initial-char 4 12)
+                  (subsequent-char 12)
+                  (subsequent-char 6)))
+         ((< n #x00200000)
+          (string (initial-char 3 18)
+                  (subsequent-char 18)
+                  (subsequent-char 12)
+                  (subsequent-char 6)))
+         ((< n #x04000000)
+          (string (initial-char 2 24)
+                  (subsequent-char 24)
+                  (subsequent-char 18)
+                  (subsequent-char 12)
+                  (subsequent-char 6)))
+         (else
+          (string (initial-char 1 30)
+                  (subsequent-char 30)
+                  (subsequent-char 24)
+                  (subsequent-char 18)
+                  (subsequent-char 12)
+                  (subsequent-char 6))))))
+\f
+(define (normalize-line-endings string)
+  (if (string-find-next-char string #\return)
+      (let ((end (string-length string)))
+       (let ((step-over-eol
+              (lambda (index)
+                (fix:+ index
+                       (if (and (fix:< (fix:+ index 1) end)
+                                (char=? (string-ref string (fix:+ index 1))
+                                        #\linefeed))
+                           2
+                           1)))))
+         (let ((n
+                (let loop ((start 0) (n 0))
+                  (let ((index
+                         (substring-find-next-char string start end
+                                                   #\return)))
+                    (if index
+                        (loop (step-over-eol index)
+                              (fix:+ n (fix:+ (fix:- index start) 1)))
+                        (fix:+ n (fix:- end start)))))))
+           (let ((string* (make-string n)))
+             (let loop ((start 0) (start* 0))
+               (let ((index
+                      (substring-find-next-char string start end
+                                                #\return)))
+                 (if index
+                     (let ((start*
+                            (substring-move! string start index
+                                             string* start*)))
+                       (string-set! string* start* #\newline)
+                       (loop (step-over-eol index)
+                             (fix:+ start* 1)))
+                     (substring-move! string start end string* start*))))
+             string*))))
+      string))
+\f
+;;;; Top level
+
+(define parse-xml-document             ;[1,22]
+  (*parser
+   (transform
+       (lambda (v)
+        (make-xml-document (vector-ref v 0)
+                           (vector-ref v 1)
+                           (vector-ref v 2)
+                           (vector-ref v 3)
+                           (vector-ref v 4)
+                           (vector-ref v 5)))
+     (complete
+      (seq (alt (top-level parse-declaration)
+               (values #f))
+          (encapsulate vector->list
+            (* (top-level parse-misc)))
+          (alt (seq (top-level parse-dtd)
+                    (encapsulate vector->list
+                      (* (top-level parse-misc))))
+               (values #f '()))
+          (top-level parse-element)
+          (encapsulate vector->list
+            (* (top-level parse-misc))))))))
+
+(define parse-misc                     ;[27]
+  (*parser
+   (alt parse-comment
+       parse-processing-instructions
+       (element-transform normalize-line-endings
+         (match (+ (alphabet char-set:xml-whitespace)))))))
+\f
+(define parse-declaration              ;[23,24,32,80]
+  (*parser
+   (encapsulate (lambda (v) (transform-declaration (vector-ref v 0)))
+     (sbracket "XML declaration" "<?xml" "?>"
+       parse-attribute-list))))
+
+(define (transform-declaration attributes)
+  (let ((finish
+        (lambda (version encoding standalone)
+          (if (not (and (string? version)
+                        (match-xml-version (string->parser-buffer version))))
+              (error "Malformed XML version:" version))
+          (if (and encoding
+                   (not (and (string? encoding)
+                             (match-encoding
+                              (string->parser-buffer encoding)))))
+              (error "Malformed encoding attribute:" encoding))
+          (if (and standalone
+                   (not (member standalone '("yes" "no"))))
+              (error "Malformed standalone attribute:" standalone))
+          (make-xml-declaration version
+                                encoding
+                                (equal? standalone "yes")))))
+    (let loop
+       ((attributes attributes)
+        (names '("version" "encoding" "standalone"))
+        (results '()))
+      (if (pair? names)
+         (if (pair? attributes)
+             (if (string=? (symbol-name (caar attributes)) (car names))
+                 (loop (cdr attributes)
+                       (cdr names)
+                       (cons (cdar attributes) results))
+                 (loop attributes
+                       (cdr names)
+                       (cons #f results)))
+             (let loop ((names names) (results results))
+               (if (pair? names)
+                   (loop (cdr names) (cons #f results))
+                   (finish (caddr results) (cadr results) (car results)))))
+         (begin
+           (if (pair? attributes)
+               (error "Extra attributes in XML declaration:" attributes))
+           (finish (caddr results) (cadr results) (car results)))))))
+
+(define match-xml-version              ;[26]
+  (let ((a (char-set-union char-set:alphanumeric (string->char-set "_.:-"))))
+    (*matcher (+ (alphabet a)))))
+
+(define match-encoding                 ;[81]
+  (let ((a (char-set-union char-set:alphanumeric (string->char-set "_.-"))))
+    (*matcher
+     (seq (alphabet char-set:alphabetic)
+         (* (alphabet a))))))
+\f
+;;;; Elements
+
+(define (parse-element buffer)         ;[39]
+  (let ((p (get-parser-buffer-pointer buffer)))
+    (let ((v (parse-start-tag buffer)))
+      (and v
+          (vector
+           (make-xml-element
+            (vector-ref v 0)
+            (vector-ref v 1)
+            (if (string=? (vector-ref v 2) ">")
+                (let loop ((elements '#()))
+                  (let ((v* (parse-end-tag buffer)))
+                    (if v*
+                        (begin
+                          (if (not (eq? (vector-ref v 0) (vector-ref v* 0)))
+                              (error "Mismatched start tag at"
+                                     (parser-buffer-position-string p)))
+                          (vector->list elements))
+                        (let ((v* (parse-content buffer)))
+                          (if (not v*)
+                              (error "Unterminated start tag at"
+                                     (parser-buffer-position-string p)))
+                          (if (equal? v* '#(""))
+                              (error "Unknown content at"
+                                     (parser-buffer-position-string buffer)))
+                          (loop (vector-append elements v*))))))
+                '())))))))
+
+(define parse-start-tag                        ;[40,44]
+  (*parser
+   (bracket "start tag"
+       (seq (noise (string "<")) maybe-parse-name)
+       (match (alt (string ">") (string "/>")))
+     parse-attribute-list)))
+
+(define parse-end-tag                  ;[42]
+  (*parser
+   (sbracket "end tag" "</" ">"
+     parse-name
+     S?)))
+
+(define parse-content                  ;[43]
+  (*parser
+   (transform
+       (lambda (v)
+        (let ((elements (vector->list v)))
+          (coalesce-strings! elements)
+          (list->vector elements)))
+     (seq parse-char-data
+         (* (seq (alt parse-element
+                      parse-reference
+                      parse-cdata-section
+                      parse-processing-instructions
+                      parse-comment)
+                 parse-char-data))))))
+\f
+;;;; Other markup
+
+(define (bracketed-region-parser description start end)
+  (let ((parser
+        (terminated-region-parser description char-set:xml-char end)))
+    (*parser (sbracket description start end parser))))
+
+(define (terminated-region-parser description alphabet . ends)
+  (let ((matcher
+        (lambda (buffer)
+          (let loop ()
+            (if (and (not (there-exists? ends
+                            (lambda (end)
+                              (match-parser-buffer-string-no-advance buffer
+                                                                     end))))
+                     (match-parser-buffer-char-in-set buffer alphabet))
+                (loop)
+                #t)))))
+    (*parser (element-transform normalize-line-endings (match matcher)))))
+
+(define parse-char-data                        ;[14]
+  (terminated-region-parser "character data" char-set:char-data "]]>"))
+
+(define parse-comment                  ;[15]
+  (let ((parser (bracketed-region-parser "comment" "<!--" "-->")))
+    (*parser (element-transform make-xml-comment parser))))
+
+(define parse-cdata-section            ;[18,19,20,21]
+  (bracketed-region-parser "CDATA section" "<![CDATA[" "]]>"))
+
+(define parse-processing-instructions  ;[16,17]
+  (let ((description "processing instructions")
+       (start "<?")
+       (end "?>"))
+    (let ((parse-body
+          (terminated-region-parser description char-set:xml-char end)))
+      (*parser
+       (encapsulate
+          (lambda (v)
+            (make-xml-processing-instructions (vector-ref v 0)
+                                              (vector-ref v 1)))
+        (sbracket description start end
+          (with-pointer ns
+            (transform
+                (lambda (v)
+                  (if (string-ci=? (symbol-name (vector-ref v 0)) "xml")
+                      (error "Illegal PI name at"
+                             (parser-buffer-position-string ns)))
+                  v)
+              parse-name))
+          parse-body))))))
+\f
+;;;; Names and references
+
+(define parse-name
+  (*parser (require-success "malformed XML name" maybe-parse-name)))
+
+(define maybe-parse-name               ;[5]
+  (*parser
+   (element-transform xml-intern
+     (match (seq (alphabet char-set:name-initial)
+                (* (alphabet char-set:name-subsequent)))))))
+
+(define parse-name-token
+  (*parser
+   (require-success "malformed XML name token"
+     maybe-parse-name-token)))
+
+(define maybe-parse-name-token         ;[7]
+  (*parser
+   (element-transform xml-intern
+     (match (+ (alphabet char-set:name-subsequent))))))
+
+(define char-set:name-initial
+  (char-set-union char-set:alphabetic
+                 (string->char-set "_:")
+                 (ascii-range->char-set #x80 #xF5)))
+
+(define char-set:name-subsequent       ;[4], loose UTF-8
+  (char-set-union char-set:alphanumeric
+                 (string->char-set ".-_:")
+                 (ascii-range->char-set #x80 #xF5)))
+
+(define parse-reference                        ;[66,67,68]
+  (let ((predefined
+        (list (cons (xml-intern "lt") "<")
+              (cons (xml-intern "gt") ">")
+              (cons (xml-intern "amp") "&")
+              (cons (xml-intern "quot") "\"")
+              (cons (xml-intern "apos") "'"))))
+    (*parser
+     (sbracket "reference" "&" ";"
+       (alt (seq (noise (string "#"))
+                (alt (element-transform
+                         (lambda (s)
+                           (make-xml-char-reference (string->number s 10)))
+                       (match (+ (alphabet char-set:numeric))))
+                     (element-transform
+                         (lambda (s)
+                           (make-xml-char-reference (string->number s 16)))
+                       (seq (noise (string "x"))
+                            (match (+ (alphabet "0-9a-fA-f")))))))
+           (element-transform
+               (lambda (name)
+                 (let ((entry (assq name predefined)))
+                   (if entry
+                       (cdr entry)
+                       (make-xml-entity-reference name))))
+             parse-name))))))
+
+(define parse-parameter-entity-reference ;[69]
+  (*parser
+   (element-transform make-xml-parameter-entity-reference
+     (sbracket "parameter-entity reference" "%" ";"
+       parse-name))))
+\f
+;;;; Attributes
+
+(define parse-attribute-list
+  (*parser
+   (encapsulate vector->list
+     (seq (* parse-attribute)
+         S?))))
+
+(define parse-attribute                        ;[41,25]
+  (*parser
+   (encapsulate (lambda (v) (cons (vector-ref v 0) (vector-ref v 1)))
+     (seq S
+         maybe-parse-name
+         S?
+         (require-success "missing attribute separator"
+           (noise (string "=")))
+         S?
+         parse-attribute-value))))
+
+(define (attribute-value-parser alphabet parse-reference)
+  (let ((a1 (char-set-difference alphabet (char-set #\")))
+       (a2 (char-set-difference alphabet (char-set #\'))))
+    (*parser
+     (encapsulate
+        (lambda (v)
+          (let ((elements (vector->list v)))
+            (coalesce-strings! elements)
+            (if (and (pair? elements)
+                     (null? (cdr elements)))
+                (car elements)
+                elements)))
+       (alt (sbracket "attribute value" "\"" "\""
+             (* (alt (match (+ (alphabet a1)))
+                     parse-reference)))
+           (sbracket "attribute value" "'" "'"
+             (* (alt (match (+ (alphabet a2)))
+                     parse-reference))))))))
+
+(define parse-entity-value             ;[9]
+  (attribute-value-parser
+   (char-set-difference char-set:xml-char (char-set #\% #\&))
+   (*parser
+    (alt parse-reference
+        parse-parameter-entity-reference))))
+
+(define parse-attribute-value          ;[10]
+  (let ((parser (attribute-value-parser char-set:char-data parse-reference)))
+    (*parser (require-success "malformed attribute value" parser))))
+
+(define (coalesce-strings! elements)
+  (do ((elements elements (cdr elements)))
+      ((not (pair? elements)))
+    (if (and (string? (car elements))
+            (pair? (cdr elements))
+            (string? (cadr elements)))
+       (begin
+         (set-car! elements
+                   (string-append (car elements)
+                                  (cadr elements)))
+         (set-cdr! elements (cddr elements))))))
+\f
+;;;; Document-type declarations
+
+(define parse-dtd                      ;[28]
+  (*parser
+   (encapsulate
+       (lambda (v)
+        (make-xml-dtd (vector-ref v 0)
+                      (vector-ref v 1)
+                      (vector-ref v 2)))
+     (sbracket "document-type declaration" "<!DOCTYPE" ">"
+       (require-success "Malformed document type"
+        (seq S
+             parse-name
+             (alt (seq S
+                       parse-external-id)
+                  (values #f))
+             S?
+             (alt (seq (encapsulate vector->list
+                         (sbracket "internal DTD" "[" "]"
+                           (* (alt parse-markup-decl
+                                   parse-decl-separator))))
+                       S?)
+                  (values #f))))))))
+
+(define parse-markup-decl              ;[29]
+  (*parser
+   (alt parse-element-decl
+       parse-attlist-decl
+       parse-entity-decl
+       parse-notation-decl
+       parse-processing-instructions
+       parse-comment)))
+
+(define parse-decl-separator           ;[28a]
+  (*parser
+   (alt parse-parameter-entity-reference
+       S)))
+
+(define parse-external-subset          ;[30]
+  (*parser
+   (seq (? parse-text-decl)
+       parse-external-subset-decl)))
+
+(define parse-external-subset-decl     ;[31]
+  (*parser
+   (* (alt parse-markup-decl
+          parse-conditional-section
+          parse-decl-separator))))
+\f
+(define parse-element-decl             ;[45]
+  (letrec
+      ((parse-children                 ;[47,49,50]
+       (*parser
+        (encapsulate encapsulate-suffix
+          (seq (sbracket "element-declaration type" "(" ")"
+                 S?
+                 (alt (encapsulate (lambda (v) (cons 'ALT (vector->list v)))
+                        (seq parse-cp
+                             (+ (seq S?
+                                     (noise (string "|"))
+                                     S?
+                                     parse-cp))))
+                      (encapsulate (lambda (v) (cons 'SEQ (vector->list v)))
+                        (seq parse-cp
+                             (* (seq S?
+                                     (noise (string ","))
+                                     S?
+                                     parse-cp)))))
+                 S?)
+               (? (match (alphabet "?*+")))))))
+
+       (parse-cp                       ;[48]
+        (*parser
+         (alt (encapsulate encapsulate-suffix
+                (seq maybe-parse-name
+                     (? (match (alphabet "?*+")))))
+              parse-children)))
+
+       (encapsulate-suffix
+       (lambda (v)
+         (if (fix:= (vector-length v) 1)
+             (vector-ref v 0)
+             (list (xml-intern (vector-ref v 1))
+                   (vector-ref v 0))))))
+
+    (*parser
+     (encapsulate
+        (lambda (v)
+          (make-xml-element-declaration (vector-ref v 0)
+                                        (vector-ref v 1)))
+       (sbracket "element declaration" "<!ELEMENT" ">"
+        S
+        parse-name
+        S
+        ;;[46]
+        (alt (element-transform xml-intern (match (string "EMPTY")))
+             (element-transform xml-intern (match (string "ANY")))
+             ;;[51]
+             (encapsulate (lambda (v) (cons 'MIX (vector->list v)))
+               (alt (sbracket "element-declaration type" "(" ")"
+                      S?
+                      (noise (string "#PCDATA"))
+                      S?)
+                    (sbracket "element-declaration type" "(" ")*"
+                      S?
+                      (noise (string "#PCDATA"))
+                      (* (seq S?
+                              (noise (string "|"))
+                              S?
+                              parse-name))
+                      S?)))
+             parse-children))))))
+\f
+(define parse-attlist-decl             ;[52,53]
+  (*parser
+   (encapsulate
+       (lambda (v)
+        (make-xml-attribute-declaration (vector-ref v 0)
+                                        (vector-ref v 1)))
+     (sbracket "attribute-list declaration" "<!ATTLIST" ">"
+       S
+       parse-name
+       (encapsulate vector->list
+        (* (encapsulate vector->list
+             (seq S
+                  maybe-parse-name
+                  S
+                  ;;[54,57]
+                  (alt (element-transform xml-intern
+                         ;;[55,56]
+                         (alt (match (string "CDATA"))
+                              (match (string "IDREFS"))
+                              (match (string "IDREF"))
+                              (match (string "ID"))
+                              (match (string "ENTITY"))
+                              (match (string "ENTITIES"))
+                              (match (string "NMTOKENS"))
+                              (match (string "NMTOKEN"))))
+                       ;;[58]
+                       (encapsulate
+                           (lambda (v)
+                             (cons 'NOTATION (vector->list v)))
+                         (bracket "notation type"
+                             (seq (noise (string "NOTATION"))
+                                  S
+                                  (noise (string "(")))
+                             (noise (string ")"))
+                           S?
+                           parse-name
+                           (* (seq S?
+                                   (noise (string "|"))
+                                   S?
+                                   parse-name))
+                           S?))
+                       ;;[59]
+                       (encapsulate
+                           (lambda (v)
+                             (cons 'ENUMERATED (vector->list v)))
+                         (sbracket "enumerated type" "(" ")"
+                           S?
+                           parse-name-token
+                           (* (seq S?
+                                   (noise (string "|"))
+                                   S?
+                                   parse-name-token))
+                           S?)))
+                  S
+                  ;;[60]
+                  (alt (element-transform xml-intern
+                         (alt (match (string "#REQUIRED"))
+                              (match (string "#IMPLIED"))))
+                       (encapsulate vector->list
+                         (seq (element-transform xml-intern
+                                (match (string "#FIXED")))
+                              S
+                              parse-attribute-value))
+                       (element-transform (lambda (v) (list 'DEFAULT v))
+                         parse-attribute-value))))))
+       S?))))
+\f
+(define parse-conditional-section      ;[61]
+  (*parser
+   (alt parse-include-section
+       parse-ignore-section)))
+
+(define-integrable conditional-start "<![")
+(define-integrable conditional-end "]]>")
+
+(define parse-include-section          ;[62]
+  (*parser
+   (encapsulate
+       (lambda (v)
+        (make-xml-include-section (vector->list v)))
+     (bracket "include section"
+        (seq (noise (string conditional-start))
+             S?
+             (noise (string "INCLUDE"))
+             S?
+             (noise (string "[")))
+        (noise (string conditional-end))
+       parse-external-subset-decl))))
+
+(define parse-ignore-section           ;[63]
+  (*parser
+   (encapsulate
+       (lambda (v)
+        (make-xml-ignore-section (vector->list v)))
+     (bracket "ignore section"
+        (seq (noise (string conditional-start))
+             S?
+             (noise (string "IGNORE"))
+             S?
+             (noise (string "[")))
+        (noise (string conditional-end))
+       (* parse-ignore-section-contents)))))
+
+(define parse-ignore-section-contents  ;[64,65]
+  (let ((parser
+        (terminated-region-parser "ignore section" char-set:xml-char
+                                  conditional-start conditional-end)))
+    (*parser
+     (seq parser
+         (* (seq (sbracket "ignore section" conditional-start conditional-end
+                   parse-ignore-section-contents)
+                 parser))))))
+\f
+(define parse-entity-decl              ;[70,71,72,73,74]
+  (*parser
+   (sbracket "entity declaration" "<!ENTITY" ">"
+     (seq S
+         (alt (encapsulate
+                  (lambda (v)
+                    (make-xml-parameter-entity-declaration (vector-ref v 0)
+                                                           (vector-ref v 1)))
+                (seq (noise (string "%"))
+                     S
+                     parse-name
+                     S
+                     (alt parse-entity-value
+                          parse-external-id)))
+              (encapsulate
+                  (lambda (v)
+                    (make-xml-entity-declaration (vector-ref v 0)
+                                                 (vector-ref v 1)))
+                (seq parse-name
+                     S
+                     (alt parse-entity-value
+                          (seq parse-external-id
+                               (? parse-ndata-decl))))))
+         S?))))
+
+(define parse-external-id              ;[75]
+  (*parser
+   (alt (encapsulate
+           (lambda (v)
+             (make-xml-external-id #f (vector-ref v 0)))
+         (seq (noise (string "SYSTEM"))
+              S
+              parse-system-literal))
+       (encapsulate
+           (lambda (v)
+             (make-xml-external-id (vector-ref v 0) (vector-ref v 1)))
+         (seq (noise (string "PUBLIC"))
+              S
+              parse-public-id-literal
+              S
+              parse-system-literal)))))
+
+(define (string-parser description alphabet)
+  (let ((a1 (char-set-difference alphabet (char-set #\")))
+       (a2 (char-set-difference alphabet (char-set #\'))))
+    (*parser
+     (alt (sbracket description "\"" "\"" (match (* (alphabet a1))))
+         (sbracket description "'" "'" (match (* (alphabet a2))))))))
+
+(define parse-public-id-literal                ;[12,13]
+  (string-parser
+   "public-ID literal"
+   (char-set-union char-set:alphanumeric
+                  (string->char-set " \r\n-'()+,./:=?;!*#@$_%"))))
+
+(define parse-system-literal           ;[11]
+  (string-parser "system literal" char-set:xml-char))
+\f
+(define parse-ndata-decl               ;[76]
+  (*parser
+   (seq S
+       (noise (string "NDATA"))
+       S
+       parse-name)))
+
+(define parse-notation-decl            ;[82,83]
+  (*parser
+   (sbracket "notation declaration" "<!NOTATION" ">"
+     S
+     parse-name
+     S
+     (alt parse-external-id
+         (encapsulate
+             (lambda (v)
+               (make-xml-external-id (vector-ref v 0) #f))
+           (seq (noise (string "PUBLIC"))
+                S
+                parse-public-id-literal)))
+     S?)))
+
+;;; Edwin Variables:
+;;; Eval: (scheme-indent-method 'encapsulate 1)
+;;; Eval: (scheme-indent-method 'transform 1)
+;;; Eval: (scheme-indent-method 'element-transform 1)
+;;; Eval: (scheme-indent-method 'require-success 1)
+;;; Eval: (scheme-indent-method 'bracket 3)
+;;; Eval: (scheme-indent-method 'sbracket 3)
+;;; End: