From 7587926db495ab63d05128b443689dc851952b41 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Wed, 17 Jan 2007 03:43:09 +0000 Subject: [PATCH] Use new matching/parsing utility procedures. Implement UTF8-STRING->XML. Fix turtle URI output so that qnames conform to turtle syntax. --- v7/src/xml/rdf-struct.scm | 41 ++++++++++++------------- v7/src/xml/turtle.scm | 18 +++++++---- v7/src/xml/xml-names.scm | 4 +-- v7/src/xml/xml-parser.scm | 63 +++++++++++++++++++-------------------- v7/src/xml/xml.pkg | 3 +- 5 files changed, 66 insertions(+), 63 deletions(-) diff --git a/v7/src/xml/rdf-struct.scm b/v7/src/xml/rdf-struct.scm index 5e2791a64..c644ac585 100644 --- a/v7/src/xml/rdf-struct.scm +++ b/v7/src/xml/rdf-struct.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: rdf-struct.scm,v 1.25 2007/01/16 21:16:46 cph Exp $ +$Id: rdf-struct.scm,v 1.26 2007/01/17 03:42:52 cph Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -86,16 +86,12 @@ USA. (string-append "B" (number->string (hash bnode)))) (define (%decode-bnode-uri uri) - (let ((handle-uri - (lambda (uri) - (let ((v - (and (string? uri) - (parse-bnode (string->parser-buffer uri))))) - (and v - (unhash (vector-ref v 0))))))) - (cond ((string? uri) (handle-uri uri)) - ((symbol? uri) (handle-uri (symbol-name uri))) - (else #f)))) + (let ((v + (cond ((string? uri) (*parse-string parse-bnode uri)) + ((symbol? uri) (*parse-symbol parse-bnode uri)) + (else #f)))) + (and v + (unhash (vector-ref v 0))))) (define parse-bnode (let ((digits (ascii-range->char-set #x30 #x3A))) @@ -141,16 +137,10 @@ USA. (%make-rdf-literal text (if (or (not type) (and (interned-symbol? type) - (complete-match match-language - (symbol-name type)))) + (*match-symbol match-language type))) type (->absolute-uri type 'RDF-LITERAL)))) -(define (complete-match matcher string) - (let ((buffer (string->parser-buffer string))) - (and (matcher buffer) - (not (peek-parser-buffer-char buffer))))) - (define match-language (let* ((language-head (ascii-range->char-set #x61 #x7B)) (language-tail @@ -279,8 +269,8 @@ USA. (define (make-rdf-qname prefix local) (guarantee-rdf-prefix prefix 'MAKE-RDF-QNAME) - (guarantee-string local 'MAKE-RDF-QNAME) - (if (not (complete-match match:name local)) + (guarantee-utf8-string local 'MAKE-RDF-QNAME) + (if (not (*match-utf8-string match:name local)) (error:bad-range-argument local 'MAKE-RDF-QNAME)) (symbol prefix local)) @@ -303,13 +293,20 @@ USA. (define (rdf-qname? object) (and (interned-symbol? object) - (match-prefix (string->parser-buffer (symbol-name object))))) + (*match-symbol match-qname object))) (define-guarantee rdf-qname "RDF QName") +(define match-qname + (*matcher (seq match-prefix match-tail))) + +(define (match-tail buffer) + (and (read-parser-buffer-char buffer) + (match-tail buffer))) + (define (rdf-prefix? object) (and (interned-symbol? object) - (complete-match match-prefix (symbol-name object)))) + (*match-symbol match-prefix object))) (define-guarantee rdf-prefix "RDF prefix") diff --git a/v7/src/xml/turtle.scm b/v7/src/xml/turtle.scm index ad11edb0e..ece01c8f3 100644 --- a/v7/src/xml/turtle.scm +++ b/v7/src/xml/turtle.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: turtle.scm,v 1.19 2007/01/05 21:19:29 cph Exp $ +$Id: turtle.scm,v 1.20 2007/01/17 03:42:56 cph Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -770,10 +770,18 @@ USA. (write-rdf/nt-literal-text text port))) (define (write-rdf/turtle-uri uri port) - (let ((qname (uri->rdf-qname uri (port/rdf-prefix-registry port) #f))) - (if qname - (write-string (symbol-name qname) port) - (write-rdf/nt-uri uri port)))) + (let* ((s (uri->string uri)) + (end (string-length s))) + (receive (prefix expansion) + (uri->rdf-prefix uri (port/rdf-prefix-registry port) #f) + (if prefix + (let ((start (string-length expansion))) + (if (*match-string match:name s start end) + (begin + (write-string (symbol-name prefix) port) + (write-substring s start end port)) + (write-rdf/nt-uri uri port))) + (write-rdf/nt-uri uri port))))) (define (sort-triples triples) (sort triples diff --git a/v7/src/xml/xml-names.scm b/v7/src/xml/xml-names.scm index 0ced5e8fd..38dc39421 100644 --- a/v7/src/xml/xml-names.scm +++ b/v7/src/xml/xml-names.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: xml-names.scm,v 1.14 2007/01/05 21:19:29 cph Exp $ +$Id: xml-names.scm,v 1.15 2007/01/17 03:43:04 cph Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -121,7 +121,7 @@ USA. (eq? (string-is-xml-nmtoken? string) 'NAME)) (define (string-is-xml-nmtoken? string) - (let ((buffer (string->parser-buffer (utf8-string->wide-string string)))) + (let ((buffer (utf8-string->parser-buffer string))) (letrec ((match-tail (lambda () diff --git a/v7/src/xml/xml-parser.scm b/v7/src/xml/xml-parser.scm index d3f30f754..c2b457e49 100644 --- a/v7/src/xml/xml-parser.scm +++ b/v7/src/xml/xml-parser.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: xml-parser.scm,v 1.72 2007/01/05 21:19:29 cph Exp $ +$Id: xml-parser.scm,v 1.73 2007/01/17 03:43:09 cph Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -86,35 +86,34 @@ USA. (let ((coding (determine-coding port))) (parse-xml (input-port->parser-buffer port) coding - (if (default-object? pi-handlers) - '() - (begin - (guarantee-pi-handlers pi-handlers 'READ-XML) - pi-handlers))))) + (guarantee-pi-handlers pi-handlers 'READ-XML)))) (define (string->xml string #!optional start end pi-handlers) - (parse-xml (string->parser-buffer string - (if (default-object? start) #f start) - (if (default-object? end) #f end)) + (parse-xml (string->parser-buffer string start end) (if (string? string) 'ISO-8859-1 'ANY) - (if (default-object? pi-handlers) - '() - (begin - (guarantee-pi-handlers pi-handlers 'STRING->XML) - pi-handlers)))) + (guarantee-pi-handlers pi-handlers 'STRING->XML))) + +(define (utf8-string->xml string #!optional start end pi-handlers) + (parse-xml (utf8-string->parser-buffer string start end) + 'UTF-8 + (guarantee-pi-handlers pi-handlers 'UTF8-STRING->XML))) (define (guarantee-pi-handlers object caller) - (if (not (list-of-type? object - (lambda (entry) - (and (pair? entry) - (symbol? (car entry)) - (pair? (cdr entry)) - (procedure? (cadr entry)) - (procedure-arity-valid? (cadr entry) 1) - (null? (cddr entry)))))) - (error:wrong-type-argument object "handler alist" caller))) + (if (default-object? object) + '() + (begin + (if (not (list-of-type? object + (lambda (entry) + (and (pair? entry) + (symbol? (car entry)) + (pair? (cdr entry)) + (procedure? (cadr entry)) + (procedure-arity-valid? (cadr entry) 1) + (null? (cddr entry)))))) + (error:wrong-type-argument object "handler alist" caller)) + object))) ;;;; Character coding @@ -296,12 +295,12 @@ USA. (if (and (not text-decl?) (not version)) (perror p "Missing XML version")) (if (and version - (not (match-xml-version (string->parser-buffer version)))) + (not (*match-string match-xml-version version))) (perror p "Malformed XML version" version)) (if (and version (not (string=? version "1.0"))) (perror p "Unsupported XML version" version)) (if (not (if encoding - (match-encoding (string->parser-buffer encoding)) + (*match-string match-encoding encoding) (not text-decl?))) (perror p "Malformed encoding attribute" encoding)) (if standalone @@ -339,14 +338,13 @@ USA. (define match-xml-version ;[26] (let ((cs (char-set-union char-set:alphanumeric (string->char-set "_.:-")))) - (*matcher (complete (+ (char-set cs)))))) + (*matcher (+ (char-set cs))))) (define match-encoding ;[81] (let ((cs (char-set-union char-set:alphanumeric (string->char-set "_.-")))) (*matcher - (complete - (seq (char-set char-set:alphabetic) - (* (char-set cs))))))) + (seq (char-set char-set:alphabetic) + (* (char-set cs)))))) ;;;; Elements @@ -840,7 +838,7 @@ USA. (call-with-output-string (lambda (port) (let normalize-string ((string string)) - (let ((b (string->parser-buffer (normalize-line-endings string)))) + (let ((b (utf8-string->parser-buffer (normalize-line-endings string)))) (let loop () (let* ((p (get-parser-buffer-pointer b)) (char (read-parser-buffer-char b))) @@ -1000,8 +998,7 @@ USA. (let ((v (expand-entity-value name p (lambda () - ((*parser (complete parse-content)) - (string->parser-buffer string)))))) + (*parse-utf8-string parse-content string))))) (if (not v) (perror p "Malformed entity reference" string)) v)) @@ -1339,7 +1336,7 @@ USA. (string? (vector-ref v 0))) (let ((v* (fluid-let ((*external-expansion?* #t)) - (parser (string->parser-buffer (vector-ref v 0)))))) + (*parse-utf8-string parser (vector-ref v 0))))) (if (not v*) (perror ptr (string-append "Malformed " description) diff --git a/v7/src/xml/xml.pkg b/v7/src/xml/xml.pkg index d1e55e8c7..bc194f5a0 100644 --- a/v7/src/xml/xml.pkg +++ b/v7/src/xml/xml.pkg @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: xml.pkg,v 1.90 2007/01/05 21:19:29 cph Exp $ +$Id: xml.pkg,v 1.91 2007/01/17 03:43:00 cph Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -284,6 +284,7 @@ USA. read-xml read-xml-file string->xml + utf8-string->xml xml-processing-instructions-handlers) (export (runtime xml) alphabet:name-initial -- 2.25.1