From 6cbd149325dc147aaed1bfe88fcffc966387dc84 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Sat, 18 Feb 2006 04:31:55 +0000 Subject: [PATCH] Add basic support for RDF triples and RDF/NT. --- v7/src/xml/compile.scm | 8 +- v7/src/xml/ed-ffi.scm | 8 +- v7/src/xml/load.scm | 6 +- v7/src/xml/rdf-nt.scm | 248 ++++++++++++++++++++++++++++++++++++++ v7/src/xml/rdf-struct.scm | 92 ++++++++++++++ v7/src/xml/xml.pkg | 45 ++++++- 6 files changed, 396 insertions(+), 11 deletions(-) create mode 100644 v7/src/xml/rdf-nt.scm create mode 100644 v7/src/xml/rdf-struct.scm diff --git a/v7/src/xml/compile.scm b/v7/src/xml/compile.scm index 14cb7a243..35527bf53 100644 --- a/v7/src/xml/compile.scm +++ b/v7/src/xml/compile.scm @@ -1,8 +1,8 @@ #| -*-Scheme-*- -$Id: compile.scm,v 1.16 2005/02/19 04:34:17 cph Exp $ +$Id: compile.scm,v 1.17 2006/02/18 04:31:34 cph Exp $ -Copyright 2001,2003,2004,2005 Massachusetts Institute of Technology +Copyright 2001,2003,2004,2005,2006 Massachusetts Institute of Technology This file is part of MIT/GNU Scheme. @@ -45,5 +45,7 @@ USA. "xml-parser" "xml-rpc" "xhtml" - "xhtml-entities")))) + "xhtml-entities" + "rdf-struct" + "rdf-nt")))) (cref/generate-constructors "xml" 'ALL))) \ No newline at end of file diff --git a/v7/src/xml/ed-ffi.scm b/v7/src/xml/ed-ffi.scm index 89c039c55..8d0f527c6 100644 --- a/v7/src/xml/ed-ffi.scm +++ b/v7/src/xml/ed-ffi.scm @@ -1,8 +1,8 @@ #| -*-Scheme-*- -$Id: ed-ffi.scm,v 1.8 2005/12/19 03:58:56 cph Exp $ +$Id: ed-ffi.scm,v 1.9 2006/02/18 04:31:38 cph Exp $ -Copyright 2001,2005 Massachusetts Institute of Technology +Copyright 2001,2005,2006 Massachusetts Institute of Technology This file is part of MIT/GNU Scheme. @@ -26,7 +26,9 @@ USA. ;;;; XML: Edwin buffer packaging info (standard-scheme-find-file-initialization - '#(("xhtml" (runtime xml html)) + '#(("rdf-nt" (runtime rdf nt)) + ("rdf-struct" (runtime rdf structures)) + ("xhtml" (runtime xml html)) ("xhtml-entities" (runtime xml html)) ("xml-chars" (runtime xml parser)) ("xml-names" (runtime xml names)) diff --git a/v7/src/xml/load.scm b/v7/src/xml/load.scm index 0e2c9304b..6e0364abb 100644 --- a/v7/src/xml/load.scm +++ b/v7/src/xml/load.scm @@ -1,8 +1,8 @@ #| -*-Scheme-*- -$Id: load.scm,v 1.15 2004/12/13 03:22:21 cph Exp $ +$Id: load.scm,v 1.16 2006/02/18 04:31:42 cph Exp $ -Copyright 2001,2002,2003,2004 Massachusetts Institute of Technology +Copyright 2001,2002,2003,2004,2006 Massachusetts Institute of Technology This file is part of MIT/GNU Scheme. @@ -28,4 +28,4 @@ USA. (with-working-directory-pathname (directory-pathname (current-load-pathname)) (lambda () (load-package-set "xml"))) -(add-subsystem-identification! "XML" '(0 7)) \ No newline at end of file +(add-subsystem-identification! "XML" '(0 8)) \ No newline at end of file diff --git a/v7/src/xml/rdf-nt.scm b/v7/src/xml/rdf-nt.scm new file mode 100644 index 000000000..f859abb68 --- /dev/null +++ b/v7/src/xml/rdf-nt.scm @@ -0,0 +1,248 @@ +#| -*-Scheme-*- + +$Id: rdf-nt.scm,v 1.1 2006/02/18 04:31:47 cph Exp $ + +Copyright 2006 Massachusetts Institute of Technology + +This file is part of MIT/GNU Scheme. + +MIT/GNU Scheme is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at +your option) any later version. + +MIT/GNU Scheme is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with MIT/GNU Scheme; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, +USA. + +|# + +;;;; Codec for RDF N-triples syntax + +(declare (usual-integrations)) + +;;;; Decoder + +(define (read-rdf/nt port) + (let loop () + (let ((line (read-line port))) + (if (eof-object? line) + line + (let ((v + (or (parse-one-line (string->parser-buffer line)) + (error "Failed to parse RDF/NT line:" line)))) + (if (fix:= (vector-length v) 0) + (loop) + (vector-ref v 0))))))) + +(define parse-one-line + (*parser + (complete + (seq (noise match-ws*) + (? (alt parse-triple parse-comment)))))) + +(define parse-triple + (*parser + (encapsulate (lambda (v) + (make-rdf-triple (vector-ref v 0) + (vector-ref v 1) + (vector-ref v 2))) + (seq (alt parse-uri-ref parse-node-id) + (noise match-ws+) + parse-uri-ref + (noise match-ws+) + (alt parse-uri-ref parse-node-id parse-literal) + (noise match-ws*) + "." + (noise match-ws*))))) + +(define parse-comment + (*parser (noise (seq "#" (* (char-set char-set:character)))))) + +(define parse-uri-ref + (*parser (seq #\< parse-absolute-uri #\>))) + +(define parse-node-id + (*parser + (encapsulate (lambda (v) (make-rdf-bnode (vector-ref v 0))) + (seq "_:" + (match match-bnode-name))))) + +(define match-bnode-name + (*matcher + (seq (char-set char-set:name-head) + (* (char-set char-set:name-tail))))) + +(define parse-literal + (*parser + (encapsulate (lambda (v) + (make-rdf-literal (vector-ref v 0) (vector-ref v 1))) + (seq #\" + parse-string + #\" + (alt (seq #\@ (match match-language)) + (seq "^^" parse-uri-ref) + (values #f)))))) + +(define (parse-string b) + (let ((port (open-output-string))) + + (define (loop) + (let ((p (get-parser-buffer-pointer b))) + (if (match-parser-buffer-char-in-set b char-set:unescaped) + (begin + (let loop () + (if (match-parser-buffer-char-in-set b char-set:unescaped) + (loop))) + (call-with-parser-buffer-tail b p + (lambda (string start end) + (write-substring string start end port)))))) + (let ((char + (let ((p (get-parser-buffer-pointer b))) + (and (match-parser-buffer-char b #\\) + (cond ((match-parser-buffer-char b #\t) #\tab) + ((match-parser-buffer-char b #\n) #\newline) + ((match-parser-buffer-char b #\r) #\return) + ((match-parser-buffer-char b #\") #\") + ((match-parser-buffer-char b #\\) #\\) + ((or (and (match-parser-buffer-char b #\u) + (match-hex 4)) + (and (match-parser-buffer-char b #\U) + (match-hex 8))) + (integer->char + (call-with-parser-buffer-tail b p + (lambda (string start end) + (substring->number string (+ start 2) end + 16 #t))))) + (else #f)))))) + (if char + (begin + (write-char char port) + (loop)) + (vector (get-output-string port))))) + + (define (match-hex n) + (let loop ((i 0)) + (if (fix:< i n) + (and (match-parser-buffer-char-in-set b char-set:hex) + (loop (fix:+ i 1))) + #t))) + + (port/set-coding port 'UTF-8) + (loop))) + +(define match-language + (*matcher + (seq (+ (char-set char-set:language-head)) + (* (seq #\- (+ (char-set char-set:language-tail))))))) + +(define match-ws* + (*matcher (* (char-set char-set:ws)))) + +(define match-ws+ + (*matcher (+ (char-set char-set:ws)))) + +(define char-set:ws + (char-set #\space #\tab)) + +(define char-set:character + (ascii-range->char-set #x20 #x7F)) + +(define char-set:hex + (char-set-union (ascii-range->char-set #x30 #x3A) + (ascii-range->char-set #x41 #x47))) + +(define char-set:unescaped + (char-set-difference char-set:character (char-set #\" #\\))) + +(define char-set:name-head + (char-set-union (ascii-range->char-set #x41 #x5B) + (ascii-range->char-set #x61 #x7B))) + +(define char-set:name-tail + (char-set-union char-set:name-head + (ascii-range->char-set #x30 #x3A))) + +(define char-set:language-head + (ascii-range->char-set #x61 #x7B)) + +(define char-set:language-tail + (char-set-union char-set:language-head + (ascii-range->char-set #x30 #x3A))) + +;;;; Encoder + +(define (write-rdf/nt triple port) + (let ((s (rdf-triple-subject triple))) + (cond ((uri? s) (write-uri-ref s port)) + ((rdf-bnode? s) (write-bnode s port)))) + (write-char #\space port) + (write-uri-ref (rdf-triple-predicate triple) port) + (write-char #\space port) + (let ((o (rdf-triple-object triple))) + (cond ((uri? o) (write-uri-ref o port)) + ((rdf-bnode? o) (write-bnode o port)) + ((rdf-literal? o) (write-literal o port)))) + (write-char #\space port) + (write-char #\. port) + (newline port)) + +(define (write-uri-ref uri port) + (write-char #\< port) + (write-uri uri port) + (write-char #\> port)) + +(define (write-bnode bnode port) + (write-string "_:" port) + (write-string (rdf-bnode-name bnode) port)) + +(define (write-literal literal port) + (write-char #\" port) + (write-literal-text (rdf-literal-text literal) port) + (write-char #\" port) + (cond ((rdf-literal-type literal) + => (lambda (uri) + (write-string "^^" port) + (write-uri-ref uri port))) + ((rdf-literal-language literal) + => (lambda (lang) + (write-char #\@ port) + (write-string lang port))))) + +(define (write-literal-text text port) + (let ((text (open-input-string text))) + (port/set-coding text 'UTF-8) + (let loop () + (let ((char (read-char text))) + (if (not (eof-object? char)) + (begin + (write-literal-char char port) + (loop))))))) + +(define (write-literal-char char port) + (if (char-set-member? char-set:unescaped char) + (write-char char port) + (begin + (write-char #\\ port) + (if (or (char=? char #\") + (char=? char #\\)) + (write-char char port) + (let ((n (char->integer char))) + (cond ((fix:= n #x9) (write-char #\t port)) + ((fix:= n #xA) (write-char #\n port)) + ((fix:= n #xD) (write-char #\r port)) + ((fix:< n #x10000) (write-hex n 4 port)) + (else (write-hex n 8 port)))))))) + +(define (write-hex n digits port) + (let loop ((n n) (m (expt 16 digits))) + (if (> m 1) + (begin + (write-char (string-ref "0123456789ABCDEF" (quotient n m)) port) + (loop (remainder n m) (quotient m 16)))))) \ No newline at end of file diff --git a/v7/src/xml/rdf-struct.scm b/v7/src/xml/rdf-struct.scm new file mode 100644 index 000000000..60248bd03 --- /dev/null +++ b/v7/src/xml/rdf-struct.scm @@ -0,0 +1,92 @@ +#| -*-Scheme-*- + +$Id: rdf-struct.scm,v 1.1 2006/02/18 04:31:51 cph Exp $ + +Copyright 2006 Massachusetts Institute of Technology + +This file is part of MIT/GNU Scheme. + +MIT/GNU Scheme is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at +your option) any later version. + +MIT/GNU Scheme is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with MIT/GNU Scheme; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, +USA. + +|# + +;;;; RDF data structures + +(declare (usual-integrations)) + +(define-record-type + (%make-rdf-triple subject predicate object) + rdf-triple? + (subject rdf-triple-subject) + (predicate rdf-triple-predicate) + (object rdf-triple-object)) + +(define-guarantee rdf-triple "RDF triple") + +(define (make-rdf-triple subject predicate object) + (%make-rdf-triple (if (rdf-bnode? subject) + subject + (->absolute-uri subject 'MAKE-RDF-TRIPLE)) + (->absolute-uri predicate 'MAKE-RDF-TRIPLE) + (if (or (rdf-bnode? object) + (rdf-literal? object)) + object + (->absolute-uri object 'MAKE-RDF-TRIPLE)))) + +(define-record-type + (%make-rdf-bnode name) + rdf-bnode? + (name rdf-bnode-name)) + +(define-guarantee rdf-bnode "RDF bnode") + +(define (make-rdf-bnode name) + (if (not (and (string? name) + (complete-match match-bnode-name name))) + (error:wrong-type-argument name "RDF bnode name" 'RDF-BNODE)) + (%make-rdf-bnode name)) + +(define-record-type + (%make-rdf-literal text type) + rdf-literal? + (text rdf-literal-text) + (type %rdf-literal-type)) + +(define-guarantee rdf-literal "RDF literal") + +(define (make-rdf-literal text type) + (guarantee-utf8-string text 'RDF-LITERAL) + (%make-rdf-literal text + (if (or (not type) + (and (string? type) + (complete-match match-language type))) + type + (->absolute-uri type 'RDF-LITERAL)))) + +(define (rdf-literal-type literal) + (let ((type (%rdf-literal-type literal))) + (and (absolute-uri? type) + type))) + +(define (rdf-literal-language literal) + (let ((type (%rdf-literal-type literal))) + (and (not (absolute-uri? type)) + type))) + +(define (complete-match matcher string #!optional start end) + (let ((buffer (string->parser-buffer string start end))) + (and (matcher buffer) + (not (peek-parser-buffer-char buffer))))) \ No newline at end of file diff --git a/v7/src/xml/xml.pkg b/v7/src/xml/xml.pkg index aabc20d73..a9753e203 100644 --- a/v7/src/xml/xml.pkg +++ b/v7/src/xml/xml.pkg @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: xml.pkg,v 1.63 2006/01/30 21:05:30 cph Exp $ +$Id: xml.pkg,v 1.64 2006/02/18 04:31:55 cph Exp $ Copyright 2001,2002,2003,2004,2005,2006 Massachusetts Institute of Technology @@ -515,4 +515,45 @@ USA. xml-rpc:parse-response xml-rpc:request xml-rpc:response - xml-rpc:simple-fault)) \ No newline at end of file + xml-rpc:simple-fault)) + +(define-package (runtime rdf) + (files) + (parent (runtime))) + +(define-package (runtime rdf structures) + (files "rdf-struct") + (parent (runtime rdf)) + (export () + + + + error:not-rdf-bnode + error:not-rdf-literal + error:not-rdf-triple + guarantee-rdf-bnode + guarantee-rdf-literal + guarantee-rdf-triple + make-rdf-bnode + make-rdf-literal + make-rdf-triple + rdf-bnode-name + rdf-bnode? + rdf-literal-language + rdf-literal-text + rdf-literal-type + rdf-literal? + rdf-triple-object + rdf-triple-predicate + rdf-triple-subject + rdf-triple?) + (export (runtime rdf nt) + match-bnode-name + match-language)) + +(define-package (runtime rdf nt) + (files "rdf-nt") + (parent (runtime rdf)) + (export () + read-rdf/nt + write-rdf/nt)) \ No newline at end of file -- 2.25.1