From: Chris Hanson Date: Wed, 27 Nov 2019 08:09:29 +0000 (-0800) Subject: Partial implementation of SRFI 115. X-Git-Tag: mit-scheme-pucked-10.1.20~10^2~44 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=0b4a952a6f68eb7fdcb47a247e31bf64d20998f0;p=mit-scheme.git Partial implementation of SRFI 115. --- diff --git a/src/runtime/ed-ffi.scm b/src/runtime/ed-ffi.scm index a97538e89..92f19c0e3 100644 --- a/src/runtime/ed-ffi.scm +++ b/src/runtime/ed-ffi.scm @@ -154,6 +154,7 @@ USA. ("sfile" (runtime simple-file-ops)) ("socket" (runtime socket)) ("srfi-1" (runtime srfi-1)) + ("srfi-115" (runtime regexp srfi-115)) ("stack-sample" (runtime stack-sampler)) ("stream" (runtime stream)) ("string" (runtime string)) diff --git a/src/runtime/make.scm b/src/runtime/make.scm index 6b08ca6a6..273c1c533 100644 --- a/src/runtime/make.scm +++ b/src/runtime/make.scm @@ -495,6 +495,7 @@ USA. ((runtime record) initialize-conditions!) ((runtime stream) initialize-conditions!) ((runtime regexp regsexp) initialize-conditions!) + ((runtime regexp srfi-115) initialize-conditions!) ;; System dependent stuff (runtime os-primitives) ;; Floating-point environment -- needed by threads. diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index 72abc6f27..f3336c19d 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -5514,6 +5514,26 @@ USA. regsexp-search-string-forward regsexp?)) +(define-package (runtime regexp srfi-115) + (files "srfi-115") + (parent (runtime regexp)) + (export () + condition-type:compile-regexp + cset-sre? + regexp + regexp-match->list + regexp-match-count + regexp-match-keys + regexp-match-submatch + regexp-match-submatch-end + regexp-match-submatch-start + regexp-match? + regexp-matches + regexp-matches? + regexp-search + regexp? + sre?)) + (define-package (runtime regexp rules) (files "regexp-rules") (parent (runtime)) @@ -5537,7 +5557,9 @@ USA. (parent (runtime)) (export (runtime regexp) all-groups + group-end group-key + group-start group-value insn:* insn:** @@ -5558,6 +5580,7 @@ USA. insn:string insn:string-end insn:string-start + make-group make-groups make-source-position make-string-position diff --git a/src/runtime/srfi-115.scm b/src/runtime/srfi-115.scm new file mode 100644 index 000000000..2cc8a4538 --- /dev/null +++ b/src/runtime/srfi-115.scm @@ -0,0 +1,419 @@ +#| -*-Scheme-*- + +Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, + 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, + 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, + 2017, 2018, 2019 Massachusetts Institute of Technology + +This file is part of MIT/GNU Scheme. + +MIT/GNU Scheme is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at +your option) any later version. + +MIT/GNU Scheme is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with MIT/GNU Scheme; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, +USA. + +|# + +;;;; SRFI 115: Scheme Regular Expressions +;;; package: (runtime regexp srfi-115) + +(declare (usual-integrations)) + +(define (sre? object) + (and (or (find-cset-sre-rule object) + (find-sre-rule object)) + #t)) +(register-predicate! sre? 'source-regexp) + +(define (cset-sre? object) + (and (find-cset-sre-rule object) + #t)) +(register-predicate! cset-sre? 'char-set-regexp) + +(define (compile-sre-top-level sre) + (%link-insn + (parameterize ((%input-pattern sre) + (%submatch-next 1)) + (compile-sre sre)))) + +(define %input-pattern (make-unsettable-parameter #f)) +(define %submatch-next (make-settable-parameter #f)) + +(define (next-submatch-number) + (let ((n (%submatch-next))) + (%submatch-next (+ n 1)) + n)) + +(define (%link-insn insn) + (make-regexp + (insn + (lambda (position groups fail) + (declare (ignore fail)) + (cons position (all-groups groups)))))) + +(define-record-type + (make-regexp impl) + regexp? + (impl regexp-impl)) + +(define condition-type:compile-regexp) +(define compile-error) +(define (initialize-conditions!) + (set! condition-type:compile-regexp + (make-condition-type 'compile-sre condition-type:error + '(pattern element) + (lambda (condition port) + (write-string "Ill-formed regular s-expression: " port) + (write (access-condition condition 'element) port) + (write-string " from pattern: " port) + (write (access-condition condition 'pattern) port)))) + (set! compile-error + (condition-signaller condition-type:compile-regexp + '(pattern element) + standard-error-handler)) + unspecific) + +;;;; Procedures + +(define (regexp-matches re string #!optional start end) + (guarantee nfc-string? string 'regexp-matches) + (let* ((end (fix:end-index end (string-length string) 'regexp-matches)) + (start (fix:start-index start end 'regexp-matches))) + (%regexp-match (regexp re) (make-string-position string start end)))) + +(define (regexp-matches? re string #!optional start end) + (guarantee nfc-string? string 'regexp-matches?) + (let* ((end (fix:end-index end (string-length string) 'regexp-matches?)) + (start (fix:start-index start end 'regexp-matches?))) + (%regexp-match (regexp re) (make-string-position string start end)))) + +(define (regexp-search re string #!optional start end) + (guarantee nfc-string? string 'regexp-search) + (let* ((end (fix:end-index end (string-length string) 'regexp-search)) + (start (fix:start-index start end 'regexp-search))) + (let ((cre (regexp re))) + (let loop ((position (make-string-position string start end))) + (or (%regexp-match cre position) + (and (next-char position) + (loop (next-position position)))))))) + +(define (regexp re) + (if (regexp? re) + re + (compile-sre-top-level re))) + +(define (%regexp-match cre start-position) + (let ((result + ((regexp-impl cre) start-position (make-groups) (lambda () #f)))) + (and result + (make-regexp-match (make-group 0 start-position (car result)) + (cdr result))))) + +(define-record-type + (make-regexp-match group0 groups) + regexp-match? + (group0 %regexp-match-group0) + (groups %regexp-match-groups)) + +(define (regexp-match-count match) + (length (%regexp-match-groups match))) + +(define (%match-group match key caller) + (if (eqv? key 0) + (%regexp-match-group0 match) + (let ((group + (find (lambda (group) + (eqv? key (group-key group))) + (%regexp-match-groups match)))) + (if (not group) + (error:bad-range-argument key caller)) + group))) + +(define (regexp-match-submatch match key) + (group-value (%match-group match key 'regexp-match-submatch))) + +(define (regexp-match-submatch-start match key) + (group-start (%match-group match key 'regexp-match-submatch-start))) + +(define (regexp-match-submatch-end match key) + (group-end (%match-group match key 'regexp-match-submatch-end))) + +(define (regexp-match->list match) + (cons (group-value (%regexp-match-group0 match)) + (map group-value (%regexp-match-groups match)))) + +(define (regexp-match-keys match) + (cons (group-key (%regexp-match-group0 match)) + (map group-key (%regexp-match-groups match)))) + +;;;; Compiler rules + +(define sre-rules) +(define sre-rewrite-rules) +(define cset-sre-rules) +(define cset-sre-rewrite-rules) +(defer-boot-action 'regexp-rules + (lambda () + (set! sre-rules (make-rules 'sre)) + (set! sre-rewrite-rules (make-rules 'sre-rewrite)) + (set! cset-sre-rules (make-rules 'cset-sre)) + (set! cset-sre-rewrite-rules (make-rules 'cset-sre-rewrite)) + unspecific)) + +(define (rule-finder rules rewrite-rules) + (let ((matcher (rules-matcher rules)) + (rewriter (rules-rewriter rewrite-rules))) + (lambda (object) + (matcher (rewriter object))))) + +(define-deferred-procedure find-sre-rule 'regexp-rules + (rule-finder sre-rules sre-rewrite-rules)) + +(define-deferred-procedure find-cset-sre-rule 'regexp-rules + (rule-finder cset-sre-rules cset-sre-rewrite-rules)) + +(define (pattern-rule-definer rules) + (let ((adder (rules-adder rules))) + (lambda (pattern operation #!optional predicate) + (adder + (if (pattern? pattern) + (pattern-rule pattern operation predicate) + (general-rule pattern predicate operation)))))) + +(define-deferred-procedure define-sre-rule 'regexp-rules + (pattern-rule-definer sre-rules)) + +(define-deferred-procedure define-sre-rewriter 'regexp-rules + (pattern-rule-definer sre-rewrite-rules)) + +(define-deferred-procedure define-cset-sre-rule 'regexp-rules + (pattern-rule-definer cset-sre-rules)) + +(define-deferred-procedure define-cset-sre-rewriter 'regexp-rules + (pattern-rule-definer cset-sre-rewrite-rules)) + +(define (alias-rule-definer rules) + (let ((adder (rules-adder rules))) + (lambda (from to) + (guarantee interned-symbol? from) + (guarantee interned-symbol? to) + (adder + (pattern-rule `(,from . ,any-object?) + (lambda args (cons to args))))))) + +(define-deferred-procedure define-sre-alias 'regexp-rules + (alias-rule-definer sre-rewrite-rules)) + +(define-deferred-procedure define-cset-sre-alias 'regexp-rules + (alias-rule-definer cset-sre-rewrite-rules)) + +(define (compile-sre sre) + (cond ((find-cset-sre-rule sre) + => (lambda (rule) + (insn:char-set ((rule-operation rule) sre)))) + ((find-sre-rule sre) + => (lambda (rule) + ((rule-operation rule) sre))) + (else + (compile-error (%input-pattern) sre)))) + +(define (compile-sres sres) + (insn:seq (map-in-order compile-sre sres))) + +(define (compile-cset-sre cset-sre) + (cond ((find-cset-sre-rule cset-sre) + => (lambda (rule) + ((rule-operation rule) cset-sre))) + (else + (compile-error (%input-pattern) cset-sre)))) + +(define (compile-cset-sres cset-sres) + (map-in-order compile-cset-sre cset-sres)) + +(define (any-char? object) + (unicode-char? object)) + +(define (min-arity? object) + (exact-nonnegative-integer? object)) + +(define (max-arity? object) + (exact-nonnegative-integer? object)) + +(define (backref-key? object) + (or (exact-positive-integer? object) + (interned-symbol? object))) + +;;;; + +(define-sre-rule "char" + (lambda (char) (insn:char char #f)) + any-char?) + +(define-sre-rule "string" + (lambda (string) (insn:string string #f)) + string?) + +(define-sre-rule `(* . ,sre?) + (lambda sres (insn:* (compile-sres sres)))) +(define-sre-alias 'zero-or-more '*) + +(define-sre-rule `(+ . ,sre?) + (lambda sres (insn:** 1 #f (compile-sres sres)))) +(define-sre-alias 'one-or-more '+) + +(define-sre-rule `(? . ,sre?) + (lambda sres (insn:? (compile-sres sres)))) +(define-sre-alias 'optional '?) + +(define-sre-rule `(= ,min-arity? . ,sre?) + (lambda (n . sres) (insn:** n n (compile-sres sres)))) +(define-sre-alias 'exactly '=) + +(define-sre-rule `(>= ,min-arity? . ,sre?) + (lambda (n . sres) (insn:** n #f (compile-sres sres)))) +(define-sre-alias 'at-least '>=) + +(define-sre-rule `(** ,min-arity? ,max-arity? . ,sre?) + (lambda (n m . sres) (insn:** n m (compile-sres sres))) + (lambda (n m . sres) (declare (ignore sres)) (<= n m))) +(define-sre-alias 'repeated '**) + +(define-sre-rule `(|\|| . ,sre?) + (lambda sres (insn:alt (map-in-order compile-sre sres)))) +(define-sre-alias 'or '|\||) + +(define-sre-rule `(: . ,sre?) + (lambda sres (compile-sres sres))) +(define-sre-alias 'seq ':) + +(define-sre-rule `($ . ,sre?) + (lambda sres + (insn:group (next-submatch-number) + (compile-sres sres)))) +(define-sre-alias 'submatch '$) + +(define-sre-rule `(-> ,interned-symbol? . ,sre?) + (lambda (key . sres) + (insn:group key (compile-sres sres)))) +(define-sre-alias 'submatch-named '->) + +(define-sre-rule 'bos (lambda () (insn:string-start))) +(define-sre-rule 'eos (lambda () (insn:string-end))) +(define-sre-rule 'bol (lambda () (insn:line-start))) +(define-sre-rule 'eol (lambda () (insn:line-end))) + +(define-sre-rule `(?? . ,sre?) + (lambda sres (insn:?? (compile-sres sres)))) +(define-sre-alias 'non-greedy-optional '??) + +(define-sre-rule `(*? . ,sre?) + (lambda sres (insn:*? (compile-sres sres)))) +(define-sre-alias 'non-greedy-zero-or-more '*?) + +(define-sre-rule `(**? ,min-arity? ,max-arity? . ,sre?) + (lambda (n m . sres) (insn:**? n m (compile-sres sres))) + (lambda (n m . sres) (declare (ignore sres)) (<= n m))) +(define-sre-alias 'non-greedy-repeated '**?) + +(define-sre-rule `(backref ,backref-key?) + (lambda (key) (insn:group-ref key))) + +;;;; + +(define-cset-sre-rule "char" + (lambda (char) (char-set char)) + any-char?) + +(define-cset-sre-rule "string" + (lambda (string) (char-set string)) + (lambda (object) + (and (string? object) + (fix:= 1 (string-length object))))) + +(define-cset-sre-rule "char-set" + (lambda (cs) cs) + (lambda (object) (char-set? object))) + +(define-cset-sre-rule `(,string?) + (lambda (s) (char-set s))) + +(define-cset-sre-rule `(char-set ,string?) + (lambda (s) (char-set s))) + +(define (range-spec? object) + (or (unicode-char? object) + (and (string? object) + (let ((end (string-length object))) + (and (even? end) + (let loop ((i 0)) + (if (fix:< i end) + (and (char<=? (string-ref object i) + (string-ref object (fix:+ i 1))) + (loop (fix:+ i 2))) + #t))))))) + +(define (range-spec->ranges rs) + (if (unicode-char? rs) + (list rs) + (let ((end (string-length rs))) + (let loop ((i 0) (ranges '())) + (if (fix:< i end) + (loop (fix:+ i 2) + (cons (list (char->integer (string-ref rs i)) + (char->integer (string-ref rs (fix:+ i 1)))) + ranges)) + ranges))))) + +(define-cset-sre-rule `(/ . ,range-spec?) + (lambda rs (char-set* (append-map range-spec->ranges rs)))) +(define-cset-sre-alias 'char-range '/) + +(define-cset-sre-rule `(or . ,cset-sre?) + (lambda cset-sres (char-set-union* (compile-cset-sres cset-sres)))) +(define-cset-sre-alias '|\|| 'or) + +(define-cset-sre-rule `(and . ,cset-sre?) + (lambda cset-sres (char-set-intersection* (compile-cset-sres cset-sres)))) +(define-cset-sre-alias '& 'and) + +(define-cset-sre-rule `(- . ,cset-sre?) + (lambda cset-sres (apply char-set-difference (compile-cset-sres cset-sres)))) +(define-cset-sre-alias 'difference '-) + +(define-cset-sre-rule `(~ . ,cset-sre?) + (lambda cset-sres + (char-set-difference char-set:unicode + (char-set-union* (compile-cset-sres cset-sres))))) +(define-cset-sre-alias 'complement '~) + +(for-each (lambda (names) + (let ((operation (lambda () (char-set (car names))))) + (for-each (lambda (name) + (define-cset-sre-rule name operation)) + names))) + '((any) + (nonl) + (ascii) + (lower-case lower) + (upper-case upper) + (title-case title) + (alphabetic alpha) + (numeric num) + (alphanumeric alphanum alnum) + (punctuation punct) + (symbol) + (graphic graph) + (whitespace white space) + (printing print) + (control cntrl) + (hex-digit xdigit))) \ No newline at end of file diff --git a/tests/check.scm b/tests/check.scm index e6ba132b1..7fb7c70a9 100644 --- a/tests/check.scm +++ b/tests/check.scm @@ -104,6 +104,7 @@ USA. "runtime/test-regsexp" "runtime/test-rgxcmp" "runtime/test-sha3" + "runtime/test-srfi-115" "runtime/test-string" "runtime/test-string-normalization" "runtime/test-string-search" diff --git a/tests/runtime/test-srfi-115.scm b/tests/runtime/test-srfi-115.scm new file mode 100644 index 000000000..55155bdbc --- /dev/null +++ b/tests/runtime/test-srfi-115.scm @@ -0,0 +1,762 @@ +#| -*-Scheme-*- + +Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, + 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, + 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, + 2017, 2018, 2019 Massachusetts Institute of Technology + +This file is part of MIT/GNU Scheme. + +MIT/GNU Scheme is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at +your option) any later version. + +MIT/GNU Scheme is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with MIT/GNU Scheme; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, +USA. + +|# + +;;;; Tests of regular s-expressions + +(declare (usual-integrations)) + +(define (match-strings-test pattern entries) + (if (equal? entries '(pattern-error)) + (lambda () + (assert-error (lambda () (regexp pattern)) + (list condition-type:compile-regexp))) + (let ((cr (regexp pattern))) + (map (lambda (p) + (if (string? p) + (%match-string-test pattern cr p + (list 0 (string-length p))) + (%match-string-test pattern cr (car p) (cadr p)))) + entries)))) + +(define (match-strings-test* patterns entries) + (append-map (lambda (pattern) + (match-strings-test pattern entries)) + patterns)) + +(define (match-string-test pattern string expected) + (%match-string-test pattern (regexp pattern) string expected)) + +(define (%match-string-test pattern cr string expected) + (let ((thunk (lambda () (translate-regexp-match (regexp-matches cr string))))) + (lambda () + (with-test-properties + (lambda () + (assert-equal (thunk) expected)) + 'expression `(match-string ',pattern ,string))))) + +(define (translate-regexp-match match) + (and match + (cons* (regexp-match-submatch-start match 0) + (regexp-match-submatch-end match 0) + (map cons + (cdr (regexp-match-keys match)) + (cdr (regexp-match->list match)))))) + +(define (multi-match-strings-test entries) + (map (lambda (entry) + (match-strings-test (car entry) (cdr entry))) + entries)) + +(define (search-strings-test pattern entries) + (if (equal? entries '(pattern-error)) + (lambda () + (assert-error (lambda () (regexp pattern)) + (list condition-type:compile-regexp))) + (let ((cr (regexp pattern))) + (map (lambda (p) + (%search-string-test pattern cr (car p) (cadr p))) + entries)))) + +(define (search-strings-test* patterns entries) + (append-map (lambda (pattern) + (search-strings-test pattern entries)) + patterns)) + +(define (search-string-test pattern string expected) + (%search-string-test pattern (regexp pattern) string expected)) + +(define (%search-string-test pattern cr string expected) + (let ((thunk (lambda () (translate-regexp-match (regexp-search cr string))))) + (lambda () + (with-test-properties + (lambda () + (assert-equal (thunk) expected)) + 'expression `(search-string ',pattern ,string))))) + +(define-test 'match-nonl + (match-strings-test 'nonl + '(("" #f) + ("a" (0 1)) + ("b" (0 1)) + ("\n" #f)))) + +(define-test 'search-nonl + (search-strings-test 'nonl + '(("" #f) + ("a" (0 1)) + ("b" (0 1)) + ("\n" #f) + ("ab" (0 1)) + ("\na" (1 2))))) + +(define-test 'match-*nonl + (match-strings-test '(* nonl) + '(("" (0 0)) + ("a" (0 1)) + ("ab" (0 2)) + ("abc" (0 3)) + ("ab\n" (0 2)) + ("a\nb" (0 1))))) + +(define-test 'search-+nonl + (search-strings-test '(+ nonl) + '(("" #f) + ("a" (0 1)) + ("ab" (0 2)) + ("abc" (0 3)) + ("ab\n" (0 2)) + ("a\nb" (0 1)) + ("\nab" (1 3))))) + +(define-test 'match-simple-seq + (match-strings-test* '((: "a" "b") (seq "a" "b")) + '(("ab" (0 2))))) + +(define-test 'search-simple-seq + (search-strings-test* '((: "a" "b") (seq "a" "b")) + '(("1914ab37" (4 6))))) + +(define-test 'match/repeat-equivalences-test + (let ((equivalents + (lambda (indices . patterns) + (map (let ((strings '("" "a" "b" "ab" "ba" "aab"))) + (lambda (pattern) + (match-strings-test + pattern + (map (lambda (string index) + (list string + (and index (list 0 index)))) + strings + indices)))) + patterns)))) + (list + (equivalents '(0 0 0 0 0 0) + "" + '(= 0 "a") + '(exactly 0 "a") + '(** 0 0 "a") + '(repeated 0 0 "a") + '(**? 0 0 "a") + '(non-greedy-repeated 0 0 "a") + '(: "" "") + '(seq "" "")) + + (equivalents '(#f 1 #f 1 #f 1) + "a" + '(= 1 "a") + '(exactly 1 "a") + '(** 1 1 "a") + '(repeated 1 1 "a") + '(**? 1 1 "a") + '(non-greedy-repeated 1 1 "a") + '(: "a" "") + '(seq "a" "") + '(: "" "a") + '(seq "" "a")) + + (equivalents '(#f #f #f #f #f 2) + "aa" + '(= 2 "a") + '(exactly 2 "a") + '(** 2 2 "a") + '(repeated 2 2 "a") + '(**? 2 2 "a") + '(non-greedy-repeated 2 2 "a") + '(: "a" "a") + '(seq "a" "a") + '(: "aa" "") + '(seq "aa" "") + '(: "" "aa") + '(seq "" "aa")) + + (equivalents '(0 1 0 1 0 2) + '(* "a") + '(zero-or-more "a") + '(>= 0 "a") + '(at-least 0 "a")) + + (equivalents '(0 0 0 0 0 0) + '(*? "a") + '(non-greedy-zero-or-more "a")) + + (equivalents '(#f 1 #f 1 #f 2) + '(+ "a") + '(one-or-more "a") + '(>= 1 "a") + '(at-least 1 "a") + '(: "a" (* "a")) + '(seq "a" (* "a"))) + + (equivalents '(#f 1 #f 1 #f 1) + '(: "a" (*? "a")) + '(seq "a" (*? "a"))) + + (equivalents '(0 1 0 1 0 1) + '(? "a") + '(optional "a") + '(** 0 1 "a") + '(repeated 0 1 "a")) + + (equivalents '(0 0 0 0 0 0) + '(?? "a") + '(non-greedy-optional "a") + '(**? 0 1 "a") + '(non-greedy-repeated 0 1 "a"))))) + +(define-test 'match-more-repeat-tests + (list + (match-string-test '(: (? "a") "a") "aab" '(0 2)) + (match-string-test '(: (? "a") "ab") "aab" '(0 3)) + + (match-string-test '(: (?? "a") "a") "aab" '(0 1)) + (match-string-test '(: (?? "a") "ab") "aab" '(0 3)) + + (match-string-test '(** 1 2 "a") "aab" '(0 2)) + (match-string-test '(: (** 1 2 "a") "b") "aab" '(0 3)) + + (match-string-test '(**? 1 2 "a") "aab" '(0 1)) + (match-string-test '(: (**? 1 2 "a") "b") "aab" '(0 3)) + + (match-string-test '(** 1 3 "a") "aaab" '(0 3)) + (match-string-test '(: (** 1 3 "a") "b") "aaab" '(0 4)) + + (match-string-test '(**? 1 3 "a") "aaab" '(0 1)) + (match-string-test '(: (**? 1 3 "a") "b") "aaab" '(0 4)) + + (match-string-test '(: (-> foo (? "a")) "a") "aab" '(0 2 (foo . "a"))) + (match-string-test '(: (-> foo (? "a")) "ab") "aab" '(0 3 (foo . "a"))) + (match-string-test '(: (-> foo (? "a")) "aab") "aab" '(0 3 (foo . ""))) + + (match-string-test '(: (-> foo (?? "a")) "a") "aab" '(0 1 (foo . ""))) + (match-string-test '(: (-> foo (?? "a")) "ab") "aab" '(0 3 (foo . "a"))) + (match-string-test '(: (-> foo (?? "a")) "aab") "aab" '(0 3 (foo . ""))) + + (match-string-test '(: (-> foo (* "a")) "b") "aab" '(0 3 (foo . "aa"))) + (match-string-test '(: (-> foo (* "a")) "ab") "aab" '(0 3 (foo . "a"))) + (match-string-test '(: (-> foo (* "a")) "aab") "aab" '(0 3 (foo . ""))) + + (match-string-test '(: (-> foo (*? "a")) "b") "aab" '(0 3 (foo . "aa"))) + (match-string-test '(: (-> foo (*? "a")) "ab") "aab" '(0 3 (foo . "a"))) + (match-string-test '(: (-> foo (*? "a")) "aab") "aab" '(0 3 (foo . ""))) + + )) + +(define-test 'search-repeat-tests + (list + (search-string-test '(: (? "a") "a") "aab" '(0 2)) + (search-string-test '(: (? "a") "a") "xaab" '(1 3)) + (search-string-test '(: (? "a") "ab") "aab" '(0 3)) + (search-string-test '(: (? "a") "ab") "xaab" '(1 4)) + + (search-string-test '(: (?? "a") "a") "aab" '(0 1)) + (search-string-test '(: (?? "a") "a") "xaab" '(1 2)) + (search-string-test '(: (?? "a") "ab") "aab" '(0 3)) + (search-string-test '(: (?? "a") "ab") "xaab" '(1 4)) + + (search-string-test '(** 1 2 "a") "aab" '(0 2)) + (search-string-test '(** 1 2 "a") "xaab" '(1 3)) + (search-string-test '(: (** 1 2 "a") "b") "aab" '(0 3)) + (search-string-test '(: (** 1 2 "a") "b") "xaab" '(1 4)) + + (search-string-test '(**? 1 2 "a") "aab" '(0 1)) + (search-string-test '(**? 1 2 "a") "xaab" '(1 2)) + (search-string-test '(: (**? 1 2 "a") "b") "aab" '(0 3)) + (search-string-test '(: (**? 1 2 "a") "b") "xaab" '(1 4)) + + (search-string-test '(** 1 3 "a") "aaab" '(0 3)) + (search-string-test '(** 1 3 "a") "xaaab" '(1 4)) + (search-string-test '(: (** 1 3 "a") "b") "aaab" '(0 4)) + (search-string-test '(: (** 1 3 "a") "b") "xaaab" '(1 5)) + + (search-string-test '(**? 1 3 "a") "aaab" '(0 1)) + (search-string-test '(**? 1 3 "a") "xaaab" '(1 2)) + (search-string-test '(: (**? 1 3 "a") "b") "aaab" '(0 4)) + (search-string-test '(: (**? 1 3 "a") "b") "xaaab" '(1 5)) + + (search-string-test '(: (-> foo (? "a")) "a") "aab" '(0 2 (foo . "a"))) + (search-string-test '(: (-> foo (? "a")) "a") "xaab" '(1 3 (foo . "a"))) + (search-string-test '(: (-> foo (? "a")) "ab") "aab" '(0 3 (foo . "a"))) + (search-string-test '(: (-> foo (? "a")) "ab") "xaab" '(1 4 (foo . "a"))) + (search-string-test '(: (-> foo (? "a")) "aab") "aab" '(0 3 (foo . ""))) + (search-string-test '(: (-> foo (? "a")) "aab") "xaab" '(1 4 (foo . ""))) + + (search-string-test '(: (-> foo (?? "a")) "a") "aab" '(0 1 (foo . ""))) + (search-string-test '(: (-> foo (?? "a")) "a") "xaab" '(1 2 (foo . ""))) + (search-string-test '(: (-> foo (?? "a")) "ab") "aab" '(0 3 (foo . "a"))) + (search-string-test '(: (-> foo (?? "a")) "ab") "xaab" '(1 4 (foo . "a"))) + (search-string-test '(: (-> foo (?? "a")) "aab") "aab" '(0 3 (foo . ""))) + (search-string-test '(: (-> foo (?? "a")) "aab") "xaab" '(1 4 (foo . ""))) + + (search-string-test '(: (-> foo (* "a")) "b") "aab" '(0 3 (foo . "aa"))) + (search-string-test '(: (-> foo (* "a")) "b") "xaab" '(1 4 (foo . "aa"))) + (search-string-test '(: (-> foo (* "a")) "ab") "aab" '(0 3 (foo . "a"))) + (search-string-test '(: (-> foo (* "a")) "ab") "xaab" '(1 4 (foo . "a"))) + (search-string-test '(: (-> foo (* "a")) "aab") "aab" '(0 3 (foo . ""))) + (search-string-test '(: (-> foo (* "a")) "aab") "xaab" '(1 4 (foo . ""))) + + (search-string-test '(: (-> foo (*? "a")) "b") "aab" '(0 3 (foo . "aa"))) + (search-string-test '(: (-> foo (*? "a")) "b") "xaab" '(1 4 (foo . "aa"))) + (search-string-test '(: (-> foo (*? "a")) "ab") "aab" '(0 3 (foo . "a"))) + (search-string-test '(: (-> foo (*? "a")) "ab") "xaab" '(1 4 (foo . "a"))) + (search-string-test '(: (-> foo (*? "a")) "aab") "aab" '(0 3 (foo . ""))) + (search-string-test '(: (-> foo (*? "a")) "aab") "xaab" '(1 4 (foo . ""))) + + )) + +(define-test 'match-palindromes + (list + (match-strings-test '(: (-> a nonl) + (-> b nonl) + nonl + (backref b) + (backref a)) + '(("radar" (0 5 (a . "r") (b . "a"))))) + (match-strings-test '(: bos + ($ (? nonl)) + ($ (? nonl)) + ($ (? nonl)) + ($ (? nonl)) + ($ (? nonl)) + ($ (? nonl)) + ($ (? nonl)) + ($ (? nonl)) + ($ (? nonl)) + (? nonl) + (backref 9) + (backref 8) + (backref 7) + (backref 6) + (backref 5) + (backref 4) + (backref 3) + (backref 2) + (backref 1) + eos) + '(("civic" (0 5 + (1 . "c") (2 . "i") (3 . "") (4 . "") + (5 . "") (6 . "") (7 . "") (8 . "") + (9 . ""))) + ("abba" (0 4 + (1 . "a") (2 . "b") (3 . "") (4 . "") + (5 . "") (6 . "") (7 . "") (8 . "") + (9 . ""))))) + (match-strings-test '(: bos + ($ (?? nonl)) + ($ (?? nonl)) + ($ (?? nonl)) + ($ (?? nonl)) + ($ (?? nonl)) + ($ (?? nonl)) + ($ (?? nonl)) + ($ (?? nonl)) + ($ (?? nonl)) + (?? nonl) + (backref 9) + (backref 8) + (backref 7) + (backref 6) + (backref 5) + (backref 4) + (backref 3) + (backref 2) + (backref 1) + eos) + '(("civic" (0 5 + (1 . "") (2 . "") (3 . "") (4 . "") + (5 . "") (6 . "") (7 . "") (8 . "c") + (9 . "i"))) + ("abba" (0 4 + (1 . "") (2 . "") (3 . "") (4 . "") + (5 . "") (6 . "") (7 . "") (8 . "a") + (9 . "b"))))) + )) + +;;; Ripped off from "grep/tests/bre.tests". +(define-test 'match-grep-bre + (multi-match-strings-test + '(((: "a" (: "b") "c") + "abc") + ((: "a" (:) "b") + "ab") + ((: (* "a") + (: bos "b" eos) + (* "c")) + "b") + ((:) + ("abc" (0 0))) + ((: "a" + (-> x (* "b")) + "c" + (backref x) + "d") + ("abbcbd" #f) + ("abbcbbd" (0 7 (x . "bb"))) + ("abbcbbbd" #f)) + ((: bos + (-> x nonl) + (backref x)) + ("abc" #f)) + ((: "a" + (* (-> x ("bc")) (backref x)) + "d") + ("abbccd" (0 6 (x . "b") (x . "c"))) + ("abbcbd" #f)) + ((: "a" + (* (* (-> x "b")) (backref x)) + "d") + ("abbbd" (0 5 (x . "b") (x . "b")))) + ((: (-> x "a") + (backref x) + "bcd") + ("aabcd" (0 5 (x . "a")))) + ((: (-> x "a") + (backref x) + "b" + (* "c") + "d") + ("aabcd" (0 5 (x . "a"))) + ("aabd" (0 4 (x . "a"))) + ("aabcccd" (0 7 (x . "a")))) + ((: (-> x "a") + (backref x) + "b" + (* "c") + ("ce") + "d") + ("aabcccd" (0 7 (x . "a")))) + ((: bos + (-> x "a") + (backref x) + "b" + (* "c") + "cd" + eos) + ("aabcccd" (0 7 (x . "a")))) + ((: (= 1 "a") "b") + "ab") + ((: (>= 1 "a") "b") + "ab") + ((: (** 1 2 "a") "b") + "aab") + ((: "a" (= 0 "b") "c") + "ac" + ("abc" #f)) + ((: "a" (** 0 1 "b") "c") + "ac" + "abc" + ("abbc" #f)) + ((: "a" (** 0 3 "b") "c") + "ac" + "abc" + "abbc" + "abbbc" + ("abbbbc" #f)) + ((: "a" (** 1 0 "b") "c") + pattern-error) + ((: "a" (** #f 1 "b") "c") + pattern-error) + ((: "a" (= 1 "b") "c") + ("ac" #f) + "abc") + ((: "a" (** 1 3 "b") "c") + ("ac" #f) + "abc") + ((: "a" (= 2 "b") "c") + ("abc" #f) + "abbc") + ((: "a" (** 2 4 "b") "c") + ("abcabbc" #f)) + ((: "a" + (? (-> x "b")) + "c" + (backref x) + "d") + "acd") + ((: (** 0 1 "-") + (+ ("0123456789")) + eos) + "-5")))) + +;;; Ripped off from "grep/tests/ere.tests". +(define-test 'match-grep-ere + (multi-match-strings-test + '(((or "abc" "de") + "abc") + ((or "a" "b" "c") + ("abc" (0 1))) + ((: "a" nonl "c") + "abc") + ((: "a" ("bc") "d") + "abd") + ((: "a" (* "b") "c") + "abc") + ((: "a" (+ "b") "c") + "abc") + ((: "a" (? "b") "c") + "abc") + ((: "a" ("b") "c") + "abc") + ((: "a" ("ab") "c") + "abc") + ((: "a" (~ ("ab")) "c") + ("abc" #f) + "adc") + ((: "a" alphabetic "c") + "abc" + "adc") + ((: "a" (+ numeric) "c") + "a019c") + ((: "A" (+ lower-case) "C") + "AabC") + ((: "a" (+ upper-case) "c") + "aBCc") + ((: "a" (= 20 ("ab"))) + "aaaaabaaaabaaaabaaaab") + ((: "a" + ("ab") ("ab") ("ab") ("ab") + ("ab") ("ab") ("ab") ("ab") + ("ab") ("ab") ("ab") ("ab") + ("ab") ("ab") ("ab") ("ab") + ("ab") ("ab") ("ab") ("ab")) + "aaaaabaaaabaaaabaaaab") + ((: "a" + ("ab") ("ab") ("ab") ("ab") + ("ab") ("ab") ("ab") ("ab") + ("ab") ("ab") ("ab") ("ab") + ("ab") ("ab") ("ab") ("ab") + ("ab") ("ab") ("ab") ("ab") + (or "wee" "week") + (or "knights" "night")) + "aaaaabaaaabaaaabaaaabweeknights") + ((: ("ab") ("cd") ("ef") ("gh") + ("ij") ("kl") ("mn")) + ("acegikmoq" (0 7))) + ((: ("ab") ("cd") ("ef") ("gh") + ("ij") ("kl") ("mn") ("op")) + ("acegikmoq" (0 8))) + ((: ("ab") ("cd") ("ef") ("gh") + ("ij") ("kl") ("mn") ("op") + ("qr")) + ("acegikmoqy" (0 9))) + ((: ("ab") ("cd") ("ef") ("gh") + ("ij") ("kl") ("mn") ("op") + ("q")) + ("acegikmoqy" (0 9))) + ("aBc" + ("Abc" #f)) + ((: "a" (* ("Bc")) "d") + "acBd" + "aBcd" + "aBcBcBd" + ("aBCd" #f) + ("abcd" #f) + ("abBCcd" #f)) + ((: "a" (~ ("b")) "c") + ("abc" #f) + "aBc" + "adc") + ((: ("a") "b" ("c")) + "abc") + ((: ("a") "b" ("a")) + "aba") + ((: ("abc") "b" ("abc")) + "abc") + ((: ("abc") "b" ("abd")) + ("abc" #f) + "abd") + ((: "a" (+ (? "b") "c") "d") + "accd") + ((* "a") + ("b" (0 0))) + ((: (or "wee" "week") (or "knights" "night")) + "weeknights") + ((: (or "we" "wee" "week" "frob") (or "knights" "night" "day")) + "weeknights") + ("abcdefghijklmnop" + "abcdefghijklmnop") + ("abcdefghijklmnopqrstuv" + "abcdefghijklmnopqrstuv") + ((or (: "CC" ("13") "1") + (: (= 21 "a") + ("23") + ("EO") + ("123") + ("Es") + ("12") + (= 15 "a") + "aa" + ("34") + ("EW") + "aaaaaaa" + ("X") + "a")) + "CC11")))) + +;; Ripped off from "grep/tests/khadafy.*". +(define-test 'match-grep-muammar-qaddafi + (match-strings-test + '(: "M" + ("ou") + (? "'") + "a" + (+ "m") + ("ae") + "r " + (* nonl) + (? ("AEae") "l" ("- ")) + ("GKQ") + (? "h") + (+ ("aeu")) + (+ ("dtz") (? ("dhz"))) + "af" + ("iy")) + '("Muammar Qaddafi" + "Mo'ammar Gadhafi" + "Muammar Kaddafi" + "Muammar Qadhafi" + "Moammar El Kadhafi" + "Muammar Gadafi" + "Mu'ammar al-Qadafi" + "Moamer El Kazzafi" + "Moamar al-Gaddafi" + "Mu'ammar Al Qathafi" + "Muammar Al Qathafi" + "Mo'ammar el-Gadhafi" + "Moamar El Kadhafi" + "Muammar al-Qadhafi" + "Mu'ammar al-Qadhdhafi" + "Mu'ammar Qadafi" + "Moamar Gaddafi" + "Mu'ammar Qadhdhafi" + "Muammar Khaddafi" + "Muammar al-Khaddafi" + "Mu'amar al-Kadafi" + "Muammar Ghaddafy" + "Muammar Ghadafi" + "Muammar Ghaddafi" + "Muamar Kaddafi" + "Muammar Quathafi" + "Muammar Gheddafi" + "Muamar Al-Kaddafi" + "Moammar Khadafy" + "Moammar Qudhafi" + "Mu'ammar al-Qaddafi" + "Mu'ammar Muhammad Abu Minyar al-Qadhafi"))) + +;; Ripped off from "grep/tests/spencer1.*". +(define-test 'match-grep-spencer + (multi-match-strings-test + '(("abc" + "abc" + ("xbc" #f) + ("axc" #f) + ("abx" #f)) + ((: "a" (* "b") "c") + "abc") + ((: "a" (* "b") "bc") + "abc" + "abbc" + "abbbbc") + ((: "a" (+ "b") "bc") + ("abc" #f) + "abbc" + "abbbbc" + ("abq" #f)) + ((: "a" (? "b") "bc") + "abc" + "abbc" + ("abbbbc" #f)) + ((: "a" (? "b") "c") + "abc") + ((: bos "abc" eos) + "abc" + ("abcc" #f)) + ((: bos "abc") + ("abcc" (0 3))) + (bos + ("abc" (0 0))) + (eos + "" + ("a" #f)) + ((: "a" nonl "c") + "abc" + "axc") + ((: "a" (* nonl) "c") + "axyzc" + ("axyzd" #f)) + ((: "a" ("bc") "d") + ("abc" #f) + "abd") + ((: "a" ("bcd") "e") + ("abd" #f) + "ace") + ((: "a" ("bcd")) + "ac" + ("aac" #f)) + ((: "a" (~ ("bc")) "d") + "aed" + ("abd" #f)) + ((: (+ "a") (+ "b") "c") + "abc" + "aabbc" + ("aabbabc" #f)) + ((* (* "a")) + ("-" (0 0))) + ((+ (* "a")) + ("-" (0 0))) + ((? (* "a")) + ("-" (0 0))) + ((* (or "a" (:))) + ("-" (0 0))) + ((* (or (* "a") "b")) + ("-" (0 0))) + ((* (or (+ "a") "b")) + "ab") + ((+ (or (+ "a") "b")) + "ab") + ((? (or (+ "a") "b")) + ("ba" (0 1)) + ("ab" (0 1))) + ((* (~ ("ab"))) + "cde") + ((: (* ("abc")) "d") + "abbbcd") + ((: (* ("abc")) "bcd") + "abcd") + ((or "a" "b" "c" "d" "e") + "e") + ((: (or "a" "b" "c" "d" "e") "f") + "ef") + ((: "abc" (* "d") "efg") + "abcdefg") + ("multiple words of text" + ("uh-uh" #f)) + ("multiple words" + ("multiple words, yeah" (0 14))) + ((: (-> x nonl nonl nonl nonl) + (* nonl) + (backref x)) + ("beriberi" (0 8 (x . "beri"))))))) \ No newline at end of file