From: Chris Hanson Date: Tue, 3 Dec 2019 07:50:29 +0000 (-0800) Subject: SRFI 115 complete. X-Git-Tag: mit-scheme-pucked-10.1.20~10^2~13 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=eb0d7caa3f7f27d0d3c8f9ea4b5118c6f6179fef;p=mit-scheme.git SRFI 115 complete. --- diff --git a/src/runtime/library-standard.scm b/src/runtime/library-standard.scm index 01021bdb8..c43a1f499 100644 --- a/src/runtime/library-standard.scm +++ b/src/runtime/library-standard.scm @@ -971,10 +971,33 @@ USA. make-hash-table string-ci-hash string-hash)) + +(define-standard-library '(srfi 115) + '(char-set->sre + regexp + regexp-extract + regexp-fold + regexp-match->list + regexp-match-count + regexp-match-submatch + regexp-match-submatch-end + regexp-match-submatch-start + regexp-match? + regexp-matches + regexp-matches? + regexp-partition + regexp-replace + regexp-replace-all + regexp-search + regexp-split + regexp? + rx + valid-sre?)) + (define-standard-library '(srfi 131) '(define-record-type)) - + (define-standard-library '(srfi 143) '(fixnum? fx* diff --git a/src/runtime/mit-macros.scm b/src/runtime/mit-macros.scm index 955d5eaa1..da5dd480d 100644 --- a/src/runtime/mit-macros.scm +++ b/src/runtime/mit-macros.scm @@ -284,6 +284,16 @@ USA. (scons-and conjunct (apply scons-begin body-exprs))) (else conjunct)))))))) + +;;; SRFI 115: rx + +(define $rx + (spar-transformer->runtime + (delay + (scons-rule `((* any)) + (lambda (sres) + (scons-call 'regexp + (apply scons-call 'quasiquote (scons-close ':) sres))))))) ;;;; Conditionals @@ -801,6 +811,7 @@ USA. (define-feature 'srfi-39 always) ;Parameter objects (define-feature 'srfi-62 always) ;S-expression comments (define-feature 'srfi-69 always) ;Basic Hash Tables +(define-feature 'srfi-115 always) ;Scheme Regular Expressions (define-feature 'srfi-131 always) ;ERR5RS Record Syntax (reduced) (define-feature 'srfi-133 always) ;Vector Library (R7RS-compatible) (define-feature 'srfi-143 always) ;Fixnums diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index ea598dae2..ff225b6eb 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -5083,7 +5083,7 @@ USA. (parent (runtime)) (export () (and $and) ;R7RS - (and-let* $and-let*) + (and-let* $and-let*) ;SRFI 2 (assert $assert) (begin0 $begin0) (bundle $bundle) @@ -5114,8 +5114,9 @@ USA. (letrec* $letrec*) ;R7RS (local-declare $local-declare) (parameterize $parameterize) ;R7RS + (rx $rx) ;SRFI 115 (quasiquote $quasiquote) ;R7RS - (receive $receive) + (receive $receive) ;SRFI 8 (unless $unless) ;R7RS (when $when) ;R7RS features ;R7RS @@ -5606,6 +5607,7 @@ USA. (files "srfi-115") (parent (runtime regexp)) (export () + char-set->sre condition-type:compile-regexp ;extension print-regexp ;extension regexp diff --git a/src/runtime/srfi-115.scm b/src/runtime/srfi-115.scm index 0757498dc..093352a09 100644 --- a/src/runtime/srfi-115.scm +++ b/src/runtime/srfi-115.scm @@ -44,7 +44,8 @@ USA. (parameterize ((input-pattern sre) (submatch-next (make-index-generator 1)) (submatch-keys (make-submatch-keys))) - (make-regexp (compile-matcher + (make-regexp sre + (compile-matcher (lambda () (compile-sre initial-ctx sre))) (submatch-keys->list (submatch-keys))))) @@ -56,8 +57,9 @@ USA. ((submatch-next))) (define-record-type - (make-regexp impl submatch-keys) + (make-regexp sre impl submatch-keys) regexp? + (sre regexp->sre) (impl regexp-impl) (submatch-keys regexp-submatch-keys)) @@ -487,7 +489,7 @@ USA. (define (no-capture-ctx no-capture? ctx) (make-ctx (ctx-fold? ctx) (ctx-ascii? ctx) no-capture?)) - + (define (any-char? object) (unicode-char? object)) @@ -497,6 +499,15 @@ USA. (define (max-arity? object) (exact-nonnegative-integer? object)) +(define (gcb? index string start end) + (string-gcb-fold (lambda (break prev-break break?) + (declare (ignore prev-break)) + (if (fix:> break index) + break? + (fix:= break index))) + #f + string start end)) + (define (submatch key insn) (hash-table-set! (submatch-keys) key #t) (insn:group key insn)) @@ -511,6 +522,12 @@ USA. (receive (numbered named) (partition exact-nonnegative-integer? (hash-table-keys table)) (append (sort numbered <) named))) + +(define char-set:word) +(defer-boot-action 'ucd + (lambda () + (set! char-set:word (char-set-adjoin char-set:alphabetic #\_)) + unspecific)) ;;;; @@ -574,12 +591,78 @@ USA. insn (submatch key insn))))) (define-sre-alias 'submatch-named '->) + +(define-sre-rule 'bos + (lambda (ctx) + (declare (ignore ctx)) + (insn:string-start))) + +(define-sre-rule 'eos + (lambda (ctx) + (declare (ignore ctx)) + (insn:string-end))) -(define-sre-rule 'bos (lambda (ctx) (declare (ignore ctx)) (insn:string-start))) -(define-sre-rule 'eos (lambda (ctx) (declare (ignore ctx)) (insn:string-end))) -(define-sre-rule 'bol (lambda (ctx) (declare (ignore ctx)) (insn:line-start))) -(define-sre-rule 'eol (lambda (ctx) (declare (ignore ctx)) (insn:line-end))) +(define-sre-rule 'bol + (lambda (ctx) + (declare (ignore ctx)) + (insn:start-boundary char-set:newline))) +(define-sre-rule 'eol + (lambda (ctx) + (declare (ignore ctx)) + (insn:end-boundary char-set:newline))) + +(define-sre-rule 'bow + (lambda (ctx) + (declare (ignore ctx)) + (insn:start-boundary char-set:word))) + +(define-sre-rule 'eow + (lambda (ctx) + (declare (ignore ctx)) + (insn:end-boundary char-set:word))) + +(define-sre-rule 'nwb + (lambda (ctx) + (declare (ignore ctx)) + (insn:non-boundary char-set:word))) + +(define-sre-rewriter `(word . ,valid-sre?) + (lambda (ctx . sres) + (declare (ignore ctx)) + `(: bow ,@sres eow))) + +(define-sre-rewriter `(word+ . ,valid-cset-sre?) + (lambda (ctx . cset-sres) + (declare (ignore ctx)) + `(: bow (+ (and ,char-set:word (or ,@cset-sres))) eow))) + +(define-sre-rewriter 'word + (lambda (ctx) + (declare (ignore ctx)) + `(: bow (+ ,char-set:word) eow))) + +(define-sre-rule 'bog + (lambda (ctx) + (declare (ignore ctx)) + (insn:string-zero-width + (lambda (index string start end) + (and (fix:< index end) + (gcb? index string start end)))))) + +(define-sre-rule 'eog + (lambda (ctx) + (declare (ignore ctx)) + (insn:string-zero-width + (lambda (index string start end) + (and (fix:> index start) + (gcb? index string start end)))))) + +(define-sre-rewriter 'grapheme + (lambda (ctx) + (declare (ignore ctx)) + `(: bog (* any) any eog))) + (define-sre-rule `(?? . ,valid-sre?) (lambda (ctx . sres) (insn:?? (compile-sres ctx sres)))) (define-sre-alias 'non-greedy-optional '??) @@ -641,6 +724,31 @@ USA. ranges)) ranges))))) +(define (char-set->sre char-set) + (receive (matched char-set*) (pull-out-names char-set) + (if char-set* + (let ((ranges + (cons '/ + (char-set-range-fold-right + (lambda (start end tail) + (let ((last (fix:- end 1))) + (if (fix:= last start) + (cons (integer->char start) tail) + (let ((s + (string (integer->char start) + (integer->char last)))) + (if (and (pair? tail) + (string? (car tail))) + (cons (string-append s (car tail)) + (cdr tail)) + (cons s tail)))))) + '() + char-set*)))) + (if (pair? matched) + `(or ,@matched ,ranges) + ranges)) + matched))) + (define-cset-sre-rule "char" (lambda (ctx char) (maybe-xform ctx (char-set char))) (lambda (ctx object) (declare (ignore ctx)) (unicode-char? object))) @@ -688,30 +796,54 @@ USA. (if (ctx-ascii? ctx) char-set:ascii char-set:full) (compile-cset-sres ctx cset-sres)))) (define-cset-sre-alias 'complement '~) + +(define char-set-names + '((any) + (nonl) + (ascii) + (lower-case lower) + (upper-case upper) + (title-case title) + (alphabetic alpha) + (numeric num) + (alphanumeric alphanum alnum) + (punctuation punct) + (symbol) + (graphic graph) + (whitespace white space) + (printing print) + (control cntrl) + (hex-digit xdigit))) (for-each (lambda (names) (let ((operation (lambda (ctx) - (maybe-xform ctx (char-set (car names)))))) + (maybe-xform ctx (name->char-set (car names)))))) (for-each (lambda (name) (define-cset-sre-rule name operation)) names))) - '((any) - (nonl) - (ascii) - (lower-case lower) - (upper-case upper) - (title-case title) - (alphabetic alpha) - (numeric num) - (alphanumeric alphanum alnum) - (punctuation punct) - (symbol) - (graphic graph) - (whitespace white space) - (printing print) - (control cntrl) - (hex-digit xdigit))) + char-set-names) + +(define (pull-out-names char-set) + (let ((name (char-set->name char-set))) + (if (and name + (any (lambda (names) + (memq name names)) + char-set-names)) + (values name #f) + (let loop + ((names '(alphanumeric alphabetic lower-case upper-case numeric + punctuation symbol whitespace control)) + (matched '()) + (char-set char-set)) + (if (pair? names) + (let ((char-set* (name->char-set (car names)))) + (if (char-set<= char-set* char-set) + (loop (cdr names) + (cons (car names) matched) + (char-set-difference char-set char-set*)) + (loop (cdr names) matched char-set))) + (values matched char-set)))))) (let ((proc (lambda (keyword proc value)