From 569c03cc81537fa1147bf1192c03adb6aa73b962 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Wed, 25 May 2005 03:16:12 +0000 Subject: [PATCH] Implement regular expressions for URIs. --- v7/src/runtime/runtime.pkg | 32 +++++++++- v7/src/runtime/url.scm | 127 ++++++++++++++++++++++++++++++++++++- 2 files changed, 156 insertions(+), 3 deletions(-) diff --git a/v7/src/runtime/runtime.pkg b/v7/src/runtime/runtime.pkg index a11702135..2a0627834 100644 --- a/v7/src/runtime/runtime.pkg +++ b/v7/src/runtime/runtime.pkg @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: runtime.pkg,v 14.544 2005/05/24 04:50:28 cph Exp $ +$Id: runtime.pkg,v 14.545 2005/05/25 03:16:03 cph Exp $ Copyright 1988,1989,1990,1991,1992,1993 Massachusetts Institute of Technology Copyright 1994,1995,1996,1997,1998,1999 Massachusetts Institute of Technology @@ -4818,6 +4818,36 @@ USA. uri-query uri-registry-name? uri-relative? + uri-rexp:abs-path + uri-rexp:absolute-uri + uri-rexp:authority + uri-rexp:domainlabel + uri-rexp:escaped + uri-rexp:fragment + uri-rexp:heir-part + uri-rexp:host + uri-rexp:hostname + uri-rexp:hostport + uri-rexp:ipv4-address + uri-rexp:net-path + uri-rexp:opaque-part + uri-rexp:param + uri-rexp:path-segments + uri-rexp:pchar + uri-rexp:port + uri-rexp:query + uri-rexp:reg-name + uri-rexp:rel-path + uri-rexp:rel-segment + uri-rexp:relative-uri + uri-rexp:scheme + uri-rexp:segment + uri-rexp:server + uri-rexp:toplabel + uri-rexp:uri-reference + uri-rexp:uric + uri-rexp:uric-no-slash + uri-rexp:userinfo uri-scheme uri-scheme? uri-server-host diff --git a/v7/src/runtime/url.scm b/v7/src/runtime/url.scm index 056399f09..de09e0f94 100644 --- a/v7/src/runtime/url.scm +++ b/v7/src/runtime/url.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: url.scm,v 1.20 2005/05/25 03:15:27 cph Exp $ +$Id: url.scm,v 1.21 2005/05/25 03:16:12 cph Exp $ Copyright 2000,2001,2003,2004,2005 Massachusetts Institute of Technology @@ -514,4 +514,127 @@ USA. (define (url:encode-string string) (call-with-output-string (lambda (port) - (write-escaped string url:char-set:unescaped port)))) \ No newline at end of file + (write-escaped string url:char-set:unescaped port)))) + +;;;; Regular expressions + +(define (uri-rexp:uri-reference) + (rexp-sequence (rexp-alternatives (uri-rexp:absolute-uri) + (uri-rexp:relative-uri)) + (rexp-optional "#" (uri-rexp:fragment)))) + +(define (uri-rexp:absolute-uri) + (rexp-sequence (uri-rexp:scheme) + ":" + (rexp-alternatives (uri-rexp:heir-part) + (uri-rexp:opaque-part)))) + +(define (uri-rexp:relative-uri) + (rexp-sequence (rexp-alternatives (uri-rexp:net-path) + (uri-rexp:abs-path) + (uri-rexp:rel-path)) + (rexp-optional "?" (uri-rexp:query)))) + +(define (uri-rexp:heir-part) + (rexp-sequence (rexp-alternatives (uri-rexp:net-path) + (uri-rexp:abs-path)) + (rexp-optional "?" (uri-rexp:query)))) + +(define (uri-rexp:opaque-part) + (rexp-sequence (uri-rexp:uric-no-slash) + (rexp* (uri-rexp:uric)))) + +(define (uri-rexp:uric-no-slash) + (uri-rexp:escaped char-set:uric-no-slash)) + +(define (uri-rexp:net-path) + (rexp-sequence "//" + (uri-rexp:authority) + (rexp-optional (uri-rexp:abs-path)))) + +(define (uri-rexp:abs-path) + (rexp-sequence "/" (uri-rexp:path-segments))) + +(define (uri-rexp:rel-path) + (rexp-sequence (uri-rexp:rel-segment) + (rexp-optional (uri-rexp:abs-path)))) + +(define (uri-rexp:rel-segment) + (rexp+ (uri-rexp:escaped char-set:uri-rel-segment))) + +(define (uri-rexp:scheme) + (rexp-sequence char-set:uri-alpha + (rexp* char-set:uri-scheme))) + +(define (uri-rexp:authority) + (rexp-alternatives (uri-rexp:server) + (uri-rexp:reg-name))) + +(define (uri-rexp:reg-name) + (rexp+ (uri-rexp:escaped char-set:uri-reg-name))) + +(define (uri-rexp:server) + (rexp-sequence (rexp-optional (uri-rexp:userinfo) "@") + (uri-rexp:hostport))) + +(define (uri-rexp:userinfo) + (rexp* (uri-rexp:escaped char-set:uri-userinfo))) + +(define (uri-rexp:hostport) + (rexp-sequence (uri-rexp:host) + (rexp-optional ":" (uri-rexp:port)))) + +(define (uri-rexp:host) + (rexp-alternatives (uri-rexp:hostname) + (uri-rexp:ipv4-address))) + +(define (uri-rexp:hostname) + (rexp-sequence (rexp* (uri-rexp:domainlabel) ".") + (uri-rexp:toplabel) + (rexp-optional "."))) + +(define (uri-rexp:domainlabel) + (rexp-sequence char-set:uri-alphanum + (rexp-optional (rexp* char-set:uri-alphanum-) + char-set:uri-alphanum))) + +(define (uri-rexp:toplabel) + (rexp-sequence char-set:uri-alpha + (rexp-optional (rexp* char-set:uri-alphanum-) + char-set:uri-alphanum))) + +(define (uri-rexp:ipv4-address) + (let ((digits (rexp+ char-set:uri-digit))) + (rexp-sequence digits "." digits "." digits "." digits))) + +(define (uri-rexp:port) + (rexp* char-set:uri-digit)) + +(define (uri-rexp:path-segments) + (rexp-sequence (uri-rexp:segment) + (rexp* "/" (uri-rexp:segment)))) + +(define (uri-rexp:segment) + (rexp-sequence (rexp* (uri-rexp:pchar)) + (rexp* ";" (uri-rexp:param)))) + +(define (uri-rexp:param) + (rexp* (uri-rexp:pchar))) + +(define (uri-rexp:pchar) + (uri-rexp:escaped char-set:uri-pchar)) + +(define (uri-rexp:query) + (rexp* (uri-rexp:uric))) + +(define (uri-rexp:fragment) + (rexp* (uri-rexp:uric))) + +(define (uri-rexp:uric) + (uri-rexp:escaped char-set:uric)) + +(define (uri-rexp:escaped cs) + (rexp-alternatives cs + (rexp-sequence "%" + char-set:uri-hex + char-set:uri-hex))) \ No newline at end of file -- 2.25.1