("sfile" (runtime simple-file-ops))
("socket" (runtime socket))
("srfi-1" (runtime srfi-1))
+ ("srfi-115" (runtime regexp srfi-115))
("stack-sample" (runtime stack-sampler))
("stream" (runtime stream))
("string" (runtime string))
((runtime record) initialize-conditions!)
((runtime stream) initialize-conditions!)
((runtime regexp regsexp) initialize-conditions!)
+ ((runtime regexp srfi-115) initialize-conditions!)
;; System dependent stuff
(runtime os-primitives)
;; Floating-point environment -- needed by threads.
regsexp-search-string-forward
regsexp?))
+(define-package (runtime regexp srfi-115)
+ (files "srfi-115")
+ (parent (runtime regexp))
+ (export ()
+ condition-type:compile-regexp
+ cset-sre?
+ regexp
+ regexp-match->list
+ regexp-match-count
+ regexp-match-keys
+ regexp-match-submatch
+ regexp-match-submatch-end
+ regexp-match-submatch-start
+ regexp-match?
+ regexp-matches
+ regexp-matches?
+ regexp-search
+ regexp?
+ sre?))
+
(define-package (runtime regexp rules)
(files "regexp-rules")
(parent (runtime))
(parent (runtime))
(export (runtime regexp)
all-groups
+ group-end
group-key
+ group-start
group-value
insn:*
insn:**
insn:string
insn:string-end
insn:string-start
+ make-group
make-groups
make-source-position
make-string-position
--- /dev/null
+#| -*-Scheme-*-
+
+Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994,
+ 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
+ 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016,
+ 2017, 2018, 2019 Massachusetts Institute of Technology
+
+This file is part of MIT/GNU Scheme.
+
+MIT/GNU Scheme is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at
+your option) any later version.
+
+MIT/GNU Scheme is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with MIT/GNU Scheme; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,
+USA.
+
+|#
+
+;;;; SRFI 115: Scheme Regular Expressions
+;;; package: (runtime regexp srfi-115)
+
+(declare (usual-integrations))
+\f
+(define (sre? object)
+ (and (or (find-cset-sre-rule object)
+ (find-sre-rule object))
+ #t))
+(register-predicate! sre? 'source-regexp)
+
+(define (cset-sre? object)
+ (and (find-cset-sre-rule object)
+ #t))
+(register-predicate! cset-sre? 'char-set-regexp)
+
+(define (compile-sre-top-level sre)
+ (%link-insn
+ (parameterize ((%input-pattern sre)
+ (%submatch-next 1))
+ (compile-sre sre))))
+
+(define %input-pattern (make-unsettable-parameter #f))
+(define %submatch-next (make-settable-parameter #f))
+
+(define (next-submatch-number)
+ (let ((n (%submatch-next)))
+ (%submatch-next (+ n 1))
+ n))
+
+(define (%link-insn insn)
+ (make-regexp
+ (insn
+ (lambda (position groups fail)
+ (declare (ignore fail))
+ (cons position (all-groups groups))))))
+
+(define-record-type <regexp>
+ (make-regexp impl)
+ regexp?
+ (impl regexp-impl))
+
+(define condition-type:compile-regexp)
+(define compile-error)
+(define (initialize-conditions!)
+ (set! condition-type:compile-regexp
+ (make-condition-type 'compile-sre condition-type:error
+ '(pattern element)
+ (lambda (condition port)
+ (write-string "Ill-formed regular s-expression: " port)
+ (write (access-condition condition 'element) port)
+ (write-string " from pattern: " port)
+ (write (access-condition condition 'pattern) port))))
+ (set! compile-error
+ (condition-signaller condition-type:compile-regexp
+ '(pattern element)
+ standard-error-handler))
+ unspecific)
+\f
+;;;; Procedures
+
+(define (regexp-matches re string #!optional start end)
+ (guarantee nfc-string? string 'regexp-matches)
+ (let* ((end (fix:end-index end (string-length string) 'regexp-matches))
+ (start (fix:start-index start end 'regexp-matches)))
+ (%regexp-match (regexp re) (make-string-position string start end))))
+
+(define (regexp-matches? re string #!optional start end)
+ (guarantee nfc-string? string 'regexp-matches?)
+ (let* ((end (fix:end-index end (string-length string) 'regexp-matches?))
+ (start (fix:start-index start end 'regexp-matches?)))
+ (%regexp-match (regexp re) (make-string-position string start end))))
+
+(define (regexp-search re string #!optional start end)
+ (guarantee nfc-string? string 'regexp-search)
+ (let* ((end (fix:end-index end (string-length string) 'regexp-search))
+ (start (fix:start-index start end 'regexp-search)))
+ (let ((cre (regexp re)))
+ (let loop ((position (make-string-position string start end)))
+ (or (%regexp-match cre position)
+ (and (next-char position)
+ (loop (next-position position))))))))
+
+(define (regexp re)
+ (if (regexp? re)
+ re
+ (compile-sre-top-level re)))
+
+(define (%regexp-match cre start-position)
+ (let ((result
+ ((regexp-impl cre) start-position (make-groups) (lambda () #f))))
+ (and result
+ (make-regexp-match (make-group 0 start-position (car result))
+ (cdr result)))))
+
+(define-record-type <regexp-match>
+ (make-regexp-match group0 groups)
+ regexp-match?
+ (group0 %regexp-match-group0)
+ (groups %regexp-match-groups))
+
+(define (regexp-match-count match)
+ (length (%regexp-match-groups match)))
+
+(define (%match-group match key caller)
+ (if (eqv? key 0)
+ (%regexp-match-group0 match)
+ (let ((group
+ (find (lambda (group)
+ (eqv? key (group-key group)))
+ (%regexp-match-groups match))))
+ (if (not group)
+ (error:bad-range-argument key caller))
+ group)))
+
+(define (regexp-match-submatch match key)
+ (group-value (%match-group match key 'regexp-match-submatch)))
+
+(define (regexp-match-submatch-start match key)
+ (group-start (%match-group match key 'regexp-match-submatch-start)))
+
+(define (regexp-match-submatch-end match key)
+ (group-end (%match-group match key 'regexp-match-submatch-end)))
+
+(define (regexp-match->list match)
+ (cons (group-value (%regexp-match-group0 match))
+ (map group-value (%regexp-match-groups match))))
+
+(define (regexp-match-keys match)
+ (cons (group-key (%regexp-match-group0 match))
+ (map group-key (%regexp-match-groups match))))
+\f
+;;;; Compiler rules
+
+(define sre-rules)
+(define sre-rewrite-rules)
+(define cset-sre-rules)
+(define cset-sre-rewrite-rules)
+(defer-boot-action 'regexp-rules
+ (lambda ()
+ (set! sre-rules (make-rules 'sre))
+ (set! sre-rewrite-rules (make-rules 'sre-rewrite))
+ (set! cset-sre-rules (make-rules 'cset-sre))
+ (set! cset-sre-rewrite-rules (make-rules 'cset-sre-rewrite))
+ unspecific))
+
+(define (rule-finder rules rewrite-rules)
+ (let ((matcher (rules-matcher rules))
+ (rewriter (rules-rewriter rewrite-rules)))
+ (lambda (object)
+ (matcher (rewriter object)))))
+
+(define-deferred-procedure find-sre-rule 'regexp-rules
+ (rule-finder sre-rules sre-rewrite-rules))
+
+(define-deferred-procedure find-cset-sre-rule 'regexp-rules
+ (rule-finder cset-sre-rules cset-sre-rewrite-rules))
+
+(define (pattern-rule-definer rules)
+ (let ((adder (rules-adder rules)))
+ (lambda (pattern operation #!optional predicate)
+ (adder
+ (if (pattern? pattern)
+ (pattern-rule pattern operation predicate)
+ (general-rule pattern predicate operation))))))
+
+(define-deferred-procedure define-sre-rule 'regexp-rules
+ (pattern-rule-definer sre-rules))
+
+(define-deferred-procedure define-sre-rewriter 'regexp-rules
+ (pattern-rule-definer sre-rewrite-rules))
+
+(define-deferred-procedure define-cset-sre-rule 'regexp-rules
+ (pattern-rule-definer cset-sre-rules))
+
+(define-deferred-procedure define-cset-sre-rewriter 'regexp-rules
+ (pattern-rule-definer cset-sre-rewrite-rules))
+
+(define (alias-rule-definer rules)
+ (let ((adder (rules-adder rules)))
+ (lambda (from to)
+ (guarantee interned-symbol? from)
+ (guarantee interned-symbol? to)
+ (adder
+ (pattern-rule `(,from . ,any-object?)
+ (lambda args (cons to args)))))))
+
+(define-deferred-procedure define-sre-alias 'regexp-rules
+ (alias-rule-definer sre-rewrite-rules))
+
+(define-deferred-procedure define-cset-sre-alias 'regexp-rules
+ (alias-rule-definer cset-sre-rewrite-rules))
+\f
+(define (compile-sre sre)
+ (cond ((find-cset-sre-rule sre)
+ => (lambda (rule)
+ (insn:char-set ((rule-operation rule) sre))))
+ ((find-sre-rule sre)
+ => (lambda (rule)
+ ((rule-operation rule) sre)))
+ (else
+ (compile-error (%input-pattern) sre))))
+
+(define (compile-sres sres)
+ (insn:seq (map-in-order compile-sre sres)))
+
+(define (compile-cset-sre cset-sre)
+ (cond ((find-cset-sre-rule cset-sre)
+ => (lambda (rule)
+ ((rule-operation rule) cset-sre)))
+ (else
+ (compile-error (%input-pattern) cset-sre))))
+
+(define (compile-cset-sres cset-sres)
+ (map-in-order compile-cset-sre cset-sres))
+
+(define (any-char? object)
+ (unicode-char? object))
+
+(define (min-arity? object)
+ (exact-nonnegative-integer? object))
+
+(define (max-arity? object)
+ (exact-nonnegative-integer? object))
+
+(define (backref-key? object)
+ (or (exact-positive-integer? object)
+ (interned-symbol? object)))
+\f
+;;;; <sre>
+
+(define-sre-rule "char"
+ (lambda (char) (insn:char char #f))
+ any-char?)
+
+(define-sre-rule "string"
+ (lambda (string) (insn:string string #f))
+ string?)
+
+(define-sre-rule `(* . ,sre?)
+ (lambda sres (insn:* (compile-sres sres))))
+(define-sre-alias 'zero-or-more '*)
+
+(define-sre-rule `(+ . ,sre?)
+ (lambda sres (insn:** 1 #f (compile-sres sres))))
+(define-sre-alias 'one-or-more '+)
+
+(define-sre-rule `(? . ,sre?)
+ (lambda sres (insn:? (compile-sres sres))))
+(define-sre-alias 'optional '?)
+
+(define-sre-rule `(= ,min-arity? . ,sre?)
+ (lambda (n . sres) (insn:** n n (compile-sres sres))))
+(define-sre-alias 'exactly '=)
+
+(define-sre-rule `(>= ,min-arity? . ,sre?)
+ (lambda (n . sres) (insn:** n #f (compile-sres sres))))
+(define-sre-alias 'at-least '>=)
+
+(define-sre-rule `(** ,min-arity? ,max-arity? . ,sre?)
+ (lambda (n m . sres) (insn:** n m (compile-sres sres)))
+ (lambda (n m . sres) (declare (ignore sres)) (<= n m)))
+(define-sre-alias 'repeated '**)
+
+(define-sre-rule `(|\|| . ,sre?)
+ (lambda sres (insn:alt (map-in-order compile-sre sres))))
+(define-sre-alias 'or '|\||)
+
+(define-sre-rule `(: . ,sre?)
+ (lambda sres (compile-sres sres)))
+(define-sre-alias 'seq ':)
+
+(define-sre-rule `($ . ,sre?)
+ (lambda sres
+ (insn:group (next-submatch-number)
+ (compile-sres sres))))
+(define-sre-alias 'submatch '$)
+
+(define-sre-rule `(-> ,interned-symbol? . ,sre?)
+ (lambda (key . sres)
+ (insn:group key (compile-sres sres))))
+(define-sre-alias 'submatch-named '->)
+
+(define-sre-rule 'bos (lambda () (insn:string-start)))
+(define-sre-rule 'eos (lambda () (insn:string-end)))
+(define-sre-rule 'bol (lambda () (insn:line-start)))
+(define-sre-rule 'eol (lambda () (insn:line-end)))
+
+(define-sre-rule `(?? . ,sre?)
+ (lambda sres (insn:?? (compile-sres sres))))
+(define-sre-alias 'non-greedy-optional '??)
+
+(define-sre-rule `(*? . ,sre?)
+ (lambda sres (insn:*? (compile-sres sres))))
+(define-sre-alias 'non-greedy-zero-or-more '*?)
+
+(define-sre-rule `(**? ,min-arity? ,max-arity? . ,sre?)
+ (lambda (n m . sres) (insn:**? n m (compile-sres sres)))
+ (lambda (n m . sres) (declare (ignore sres)) (<= n m)))
+(define-sre-alias 'non-greedy-repeated '**?)
+
+(define-sre-rule `(backref ,backref-key?)
+ (lambda (key) (insn:group-ref key)))
+\f
+;;;; <cset-sre>
+
+(define-cset-sre-rule "char"
+ (lambda (char) (char-set char))
+ any-char?)
+
+(define-cset-sre-rule "string"
+ (lambda (string) (char-set string))
+ (lambda (object)
+ (and (string? object)
+ (fix:= 1 (string-length object)))))
+
+(define-cset-sre-rule "char-set"
+ (lambda (cs) cs)
+ (lambda (object) (char-set? object)))
+
+(define-cset-sre-rule `(,string?)
+ (lambda (s) (char-set s)))
+
+(define-cset-sre-rule `(char-set ,string?)
+ (lambda (s) (char-set s)))
+
+(define (range-spec? object)
+ (or (unicode-char? object)
+ (and (string? object)
+ (let ((end (string-length object)))
+ (and (even? end)
+ (let loop ((i 0))
+ (if (fix:< i end)
+ (and (char<=? (string-ref object i)
+ (string-ref object (fix:+ i 1)))
+ (loop (fix:+ i 2)))
+ #t)))))))
+
+(define (range-spec->ranges rs)
+ (if (unicode-char? rs)
+ (list rs)
+ (let ((end (string-length rs)))
+ (let loop ((i 0) (ranges '()))
+ (if (fix:< i end)
+ (loop (fix:+ i 2)
+ (cons (list (char->integer (string-ref rs i))
+ (char->integer (string-ref rs (fix:+ i 1))))
+ ranges))
+ ranges)))))
+
+(define-cset-sre-rule `(/ . ,range-spec?)
+ (lambda rs (char-set* (append-map range-spec->ranges rs))))
+(define-cset-sre-alias 'char-range '/)
+
+(define-cset-sre-rule `(or . ,cset-sre?)
+ (lambda cset-sres (char-set-union* (compile-cset-sres cset-sres))))
+(define-cset-sre-alias '|\|| 'or)
+
+(define-cset-sre-rule `(and . ,cset-sre?)
+ (lambda cset-sres (char-set-intersection* (compile-cset-sres cset-sres))))
+(define-cset-sre-alias '& 'and)
+
+(define-cset-sre-rule `(- . ,cset-sre?)
+ (lambda cset-sres (apply char-set-difference (compile-cset-sres cset-sres))))
+(define-cset-sre-alias 'difference '-)
+
+(define-cset-sre-rule `(~ . ,cset-sre?)
+ (lambda cset-sres
+ (char-set-difference char-set:unicode
+ (char-set-union* (compile-cset-sres cset-sres)))))
+(define-cset-sre-alias 'complement '~)
+
+(for-each (lambda (names)
+ (let ((operation (lambda () (char-set (car names)))))
+ (for-each (lambda (name)
+ (define-cset-sre-rule name operation))
+ names)))
+ '((any)
+ (nonl)
+ (ascii)
+ (lower-case lower)
+ (upper-case upper)
+ (title-case title)
+ (alphabetic alpha)
+ (numeric num)
+ (alphanumeric alphanum alnum)
+ (punctuation punct)
+ (symbol)
+ (graphic graph)
+ (whitespace white space)
+ (printing print)
+ (control cntrl)
+ (hex-digit xdigit)))
\ No newline at end of file
"runtime/test-regsexp"
"runtime/test-rgxcmp"
"runtime/test-sha3"
+ "runtime/test-srfi-115"
"runtime/test-string"
"runtime/test-string-normalization"
"runtime/test-string-search"
--- /dev/null
+#| -*-Scheme-*-
+
+Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994,
+ 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
+ 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016,
+ 2017, 2018, 2019 Massachusetts Institute of Technology
+
+This file is part of MIT/GNU Scheme.
+
+MIT/GNU Scheme is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at
+your option) any later version.
+
+MIT/GNU Scheme is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with MIT/GNU Scheme; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,
+USA.
+
+|#
+
+;;;; Tests of regular s-expressions
+
+(declare (usual-integrations))
+\f
+(define (match-strings-test pattern entries)
+ (if (equal? entries '(pattern-error))
+ (lambda ()
+ (assert-error (lambda () (regexp pattern))
+ (list condition-type:compile-regexp)))
+ (let ((cr (regexp pattern)))
+ (map (lambda (p)
+ (if (string? p)
+ (%match-string-test pattern cr p
+ (list 0 (string-length p)))
+ (%match-string-test pattern cr (car p) (cadr p))))
+ entries))))
+
+(define (match-strings-test* patterns entries)
+ (append-map (lambda (pattern)
+ (match-strings-test pattern entries))
+ patterns))
+
+(define (match-string-test pattern string expected)
+ (%match-string-test pattern (regexp pattern) string expected))
+
+(define (%match-string-test pattern cr string expected)
+ (let ((thunk (lambda () (translate-regexp-match (regexp-matches cr string)))))
+ (lambda ()
+ (with-test-properties
+ (lambda ()
+ (assert-equal (thunk) expected))
+ 'expression `(match-string ',pattern ,string)))))
+
+(define (translate-regexp-match match)
+ (and match
+ (cons* (regexp-match-submatch-start match 0)
+ (regexp-match-submatch-end match 0)
+ (map cons
+ (cdr (regexp-match-keys match))
+ (cdr (regexp-match->list match))))))
+
+(define (multi-match-strings-test entries)
+ (map (lambda (entry)
+ (match-strings-test (car entry) (cdr entry)))
+ entries))
+
+(define (search-strings-test pattern entries)
+ (if (equal? entries '(pattern-error))
+ (lambda ()
+ (assert-error (lambda () (regexp pattern))
+ (list condition-type:compile-regexp)))
+ (let ((cr (regexp pattern)))
+ (map (lambda (p)
+ (%search-string-test pattern cr (car p) (cadr p)))
+ entries))))
+
+(define (search-strings-test* patterns entries)
+ (append-map (lambda (pattern)
+ (search-strings-test pattern entries))
+ patterns))
+
+(define (search-string-test pattern string expected)
+ (%search-string-test pattern (regexp pattern) string expected))
+
+(define (%search-string-test pattern cr string expected)
+ (let ((thunk (lambda () (translate-regexp-match (regexp-search cr string)))))
+ (lambda ()
+ (with-test-properties
+ (lambda ()
+ (assert-equal (thunk) expected))
+ 'expression `(search-string ',pattern ,string)))))
+\f
+(define-test 'match-nonl
+ (match-strings-test 'nonl
+ '(("" #f)
+ ("a" (0 1))
+ ("b" (0 1))
+ ("\n" #f))))
+
+(define-test 'search-nonl
+ (search-strings-test 'nonl
+ '(("" #f)
+ ("a" (0 1))
+ ("b" (0 1))
+ ("\n" #f)
+ ("ab" (0 1))
+ ("\na" (1 2)))))
+
+(define-test 'match-*nonl
+ (match-strings-test '(* nonl)
+ '(("" (0 0))
+ ("a" (0 1))
+ ("ab" (0 2))
+ ("abc" (0 3))
+ ("ab\n" (0 2))
+ ("a\nb" (0 1)))))
+
+(define-test 'search-+nonl
+ (search-strings-test '(+ nonl)
+ '(("" #f)
+ ("a" (0 1))
+ ("ab" (0 2))
+ ("abc" (0 3))
+ ("ab\n" (0 2))
+ ("a\nb" (0 1))
+ ("\nab" (1 3)))))
+
+(define-test 'match-simple-seq
+ (match-strings-test* '((: "a" "b") (seq "a" "b"))
+ '(("ab" (0 2)))))
+
+(define-test 'search-simple-seq
+ (search-strings-test* '((: "a" "b") (seq "a" "b"))
+ '(("1914ab37" (4 6)))))
+\f
+(define-test 'match/repeat-equivalences-test
+ (let ((equivalents
+ (lambda (indices . patterns)
+ (map (let ((strings '("" "a" "b" "ab" "ba" "aab")))
+ (lambda (pattern)
+ (match-strings-test
+ pattern
+ (map (lambda (string index)
+ (list string
+ (and index (list 0 index))))
+ strings
+ indices))))
+ patterns))))
+ (list
+ (equivalents '(0 0 0 0 0 0)
+ ""
+ '(= 0 "a")
+ '(exactly 0 "a")
+ '(** 0 0 "a")
+ '(repeated 0 0 "a")
+ '(**? 0 0 "a")
+ '(non-greedy-repeated 0 0 "a")
+ '(: "" "")
+ '(seq "" ""))
+
+ (equivalents '(#f 1 #f 1 #f 1)
+ "a"
+ '(= 1 "a")
+ '(exactly 1 "a")
+ '(** 1 1 "a")
+ '(repeated 1 1 "a")
+ '(**? 1 1 "a")
+ '(non-greedy-repeated 1 1 "a")
+ '(: "a" "")
+ '(seq "a" "")
+ '(: "" "a")
+ '(seq "" "a"))
+
+ (equivalents '(#f #f #f #f #f 2)
+ "aa"
+ '(= 2 "a")
+ '(exactly 2 "a")
+ '(** 2 2 "a")
+ '(repeated 2 2 "a")
+ '(**? 2 2 "a")
+ '(non-greedy-repeated 2 2 "a")
+ '(: "a" "a")
+ '(seq "a" "a")
+ '(: "aa" "")
+ '(seq "aa" "")
+ '(: "" "aa")
+ '(seq "" "aa"))
+
+ (equivalents '(0 1 0 1 0 2)
+ '(* "a")
+ '(zero-or-more "a")
+ '(>= 0 "a")
+ '(at-least 0 "a"))
+
+ (equivalents '(0 0 0 0 0 0)
+ '(*? "a")
+ '(non-greedy-zero-or-more "a"))
+
+ (equivalents '(#f 1 #f 1 #f 2)
+ '(+ "a")
+ '(one-or-more "a")
+ '(>= 1 "a")
+ '(at-least 1 "a")
+ '(: "a" (* "a"))
+ '(seq "a" (* "a")))
+
+ (equivalents '(#f 1 #f 1 #f 1)
+ '(: "a" (*? "a"))
+ '(seq "a" (*? "a")))
+
+ (equivalents '(0 1 0 1 0 1)
+ '(? "a")
+ '(optional "a")
+ '(** 0 1 "a")
+ '(repeated 0 1 "a"))
+
+ (equivalents '(0 0 0 0 0 0)
+ '(?? "a")
+ '(non-greedy-optional "a")
+ '(**? 0 1 "a")
+ '(non-greedy-repeated 0 1 "a")))))
+\f
+(define-test 'match-more-repeat-tests
+ (list
+ (match-string-test '(: (? "a") "a") "aab" '(0 2))
+ (match-string-test '(: (? "a") "ab") "aab" '(0 3))
+
+ (match-string-test '(: (?? "a") "a") "aab" '(0 1))
+ (match-string-test '(: (?? "a") "ab") "aab" '(0 3))
+
+ (match-string-test '(** 1 2 "a") "aab" '(0 2))
+ (match-string-test '(: (** 1 2 "a") "b") "aab" '(0 3))
+
+ (match-string-test '(**? 1 2 "a") "aab" '(0 1))
+ (match-string-test '(: (**? 1 2 "a") "b") "aab" '(0 3))
+
+ (match-string-test '(** 1 3 "a") "aaab" '(0 3))
+ (match-string-test '(: (** 1 3 "a") "b") "aaab" '(0 4))
+
+ (match-string-test '(**? 1 3 "a") "aaab" '(0 1))
+ (match-string-test '(: (**? 1 3 "a") "b") "aaab" '(0 4))
+
+ (match-string-test '(: (-> foo (? "a")) "a") "aab" '(0 2 (foo . "a")))
+ (match-string-test '(: (-> foo (? "a")) "ab") "aab" '(0 3 (foo . "a")))
+ (match-string-test '(: (-> foo (? "a")) "aab") "aab" '(0 3 (foo . "")))
+
+ (match-string-test '(: (-> foo (?? "a")) "a") "aab" '(0 1 (foo . "")))
+ (match-string-test '(: (-> foo (?? "a")) "ab") "aab" '(0 3 (foo . "a")))
+ (match-string-test '(: (-> foo (?? "a")) "aab") "aab" '(0 3 (foo . "")))
+
+ (match-string-test '(: (-> foo (* "a")) "b") "aab" '(0 3 (foo . "aa")))
+ (match-string-test '(: (-> foo (* "a")) "ab") "aab" '(0 3 (foo . "a")))
+ (match-string-test '(: (-> foo (* "a")) "aab") "aab" '(0 3 (foo . "")))
+
+ (match-string-test '(: (-> foo (*? "a")) "b") "aab" '(0 3 (foo . "aa")))
+ (match-string-test '(: (-> foo (*? "a")) "ab") "aab" '(0 3 (foo . "a")))
+ (match-string-test '(: (-> foo (*? "a")) "aab") "aab" '(0 3 (foo . "")))
+
+ ))
+\f
+(define-test 'search-repeat-tests
+ (list
+ (search-string-test '(: (? "a") "a") "aab" '(0 2))
+ (search-string-test '(: (? "a") "a") "xaab" '(1 3))
+ (search-string-test '(: (? "a") "ab") "aab" '(0 3))
+ (search-string-test '(: (? "a") "ab") "xaab" '(1 4))
+
+ (search-string-test '(: (?? "a") "a") "aab" '(0 1))
+ (search-string-test '(: (?? "a") "a") "xaab" '(1 2))
+ (search-string-test '(: (?? "a") "ab") "aab" '(0 3))
+ (search-string-test '(: (?? "a") "ab") "xaab" '(1 4))
+
+ (search-string-test '(** 1 2 "a") "aab" '(0 2))
+ (search-string-test '(** 1 2 "a") "xaab" '(1 3))
+ (search-string-test '(: (** 1 2 "a") "b") "aab" '(0 3))
+ (search-string-test '(: (** 1 2 "a") "b") "xaab" '(1 4))
+
+ (search-string-test '(**? 1 2 "a") "aab" '(0 1))
+ (search-string-test '(**? 1 2 "a") "xaab" '(1 2))
+ (search-string-test '(: (**? 1 2 "a") "b") "aab" '(0 3))
+ (search-string-test '(: (**? 1 2 "a") "b") "xaab" '(1 4))
+
+ (search-string-test '(** 1 3 "a") "aaab" '(0 3))
+ (search-string-test '(** 1 3 "a") "xaaab" '(1 4))
+ (search-string-test '(: (** 1 3 "a") "b") "aaab" '(0 4))
+ (search-string-test '(: (** 1 3 "a") "b") "xaaab" '(1 5))
+
+ (search-string-test '(**? 1 3 "a") "aaab" '(0 1))
+ (search-string-test '(**? 1 3 "a") "xaaab" '(1 2))
+ (search-string-test '(: (**? 1 3 "a") "b") "aaab" '(0 4))
+ (search-string-test '(: (**? 1 3 "a") "b") "xaaab" '(1 5))
+
+ (search-string-test '(: (-> foo (? "a")) "a") "aab" '(0 2 (foo . "a")))
+ (search-string-test '(: (-> foo (? "a")) "a") "xaab" '(1 3 (foo . "a")))
+ (search-string-test '(: (-> foo (? "a")) "ab") "aab" '(0 3 (foo . "a")))
+ (search-string-test '(: (-> foo (? "a")) "ab") "xaab" '(1 4 (foo . "a")))
+ (search-string-test '(: (-> foo (? "a")) "aab") "aab" '(0 3 (foo . "")))
+ (search-string-test '(: (-> foo (? "a")) "aab") "xaab" '(1 4 (foo . "")))
+
+ (search-string-test '(: (-> foo (?? "a")) "a") "aab" '(0 1 (foo . "")))
+ (search-string-test '(: (-> foo (?? "a")) "a") "xaab" '(1 2 (foo . "")))
+ (search-string-test '(: (-> foo (?? "a")) "ab") "aab" '(0 3 (foo . "a")))
+ (search-string-test '(: (-> foo (?? "a")) "ab") "xaab" '(1 4 (foo . "a")))
+ (search-string-test '(: (-> foo (?? "a")) "aab") "aab" '(0 3 (foo . "")))
+ (search-string-test '(: (-> foo (?? "a")) "aab") "xaab" '(1 4 (foo . "")))
+
+ (search-string-test '(: (-> foo (* "a")) "b") "aab" '(0 3 (foo . "aa")))
+ (search-string-test '(: (-> foo (* "a")) "b") "xaab" '(1 4 (foo . "aa")))
+ (search-string-test '(: (-> foo (* "a")) "ab") "aab" '(0 3 (foo . "a")))
+ (search-string-test '(: (-> foo (* "a")) "ab") "xaab" '(1 4 (foo . "a")))
+ (search-string-test '(: (-> foo (* "a")) "aab") "aab" '(0 3 (foo . "")))
+ (search-string-test '(: (-> foo (* "a")) "aab") "xaab" '(1 4 (foo . "")))
+
+ (search-string-test '(: (-> foo (*? "a")) "b") "aab" '(0 3 (foo . "aa")))
+ (search-string-test '(: (-> foo (*? "a")) "b") "xaab" '(1 4 (foo . "aa")))
+ (search-string-test '(: (-> foo (*? "a")) "ab") "aab" '(0 3 (foo . "a")))
+ (search-string-test '(: (-> foo (*? "a")) "ab") "xaab" '(1 4 (foo . "a")))
+ (search-string-test '(: (-> foo (*? "a")) "aab") "aab" '(0 3 (foo . "")))
+ (search-string-test '(: (-> foo (*? "a")) "aab") "xaab" '(1 4 (foo . "")))
+
+ ))
+\f
+(define-test 'match-palindromes
+ (list
+ (match-strings-test '(: (-> a nonl)
+ (-> b nonl)
+ nonl
+ (backref b)
+ (backref a))
+ '(("radar" (0 5 (a . "r") (b . "a")))))
+ (match-strings-test '(: bos
+ ($ (? nonl))
+ ($ (? nonl))
+ ($ (? nonl))
+ ($ (? nonl))
+ ($ (? nonl))
+ ($ (? nonl))
+ ($ (? nonl))
+ ($ (? nonl))
+ ($ (? nonl))
+ (? nonl)
+ (backref 9)
+ (backref 8)
+ (backref 7)
+ (backref 6)
+ (backref 5)
+ (backref 4)
+ (backref 3)
+ (backref 2)
+ (backref 1)
+ eos)
+ '(("civic" (0 5
+ (1 . "c") (2 . "i") (3 . "") (4 . "")
+ (5 . "") (6 . "") (7 . "") (8 . "")
+ (9 . "")))
+ ("abba" (0 4
+ (1 . "a") (2 . "b") (3 . "") (4 . "")
+ (5 . "") (6 . "") (7 . "") (8 . "")
+ (9 . "")))))
+ (match-strings-test '(: bos
+ ($ (?? nonl))
+ ($ (?? nonl))
+ ($ (?? nonl))
+ ($ (?? nonl))
+ ($ (?? nonl))
+ ($ (?? nonl))
+ ($ (?? nonl))
+ ($ (?? nonl))
+ ($ (?? nonl))
+ (?? nonl)
+ (backref 9)
+ (backref 8)
+ (backref 7)
+ (backref 6)
+ (backref 5)
+ (backref 4)
+ (backref 3)
+ (backref 2)
+ (backref 1)
+ eos)
+ '(("civic" (0 5
+ (1 . "") (2 . "") (3 . "") (4 . "")
+ (5 . "") (6 . "") (7 . "") (8 . "c")
+ (9 . "i")))
+ ("abba" (0 4
+ (1 . "") (2 . "") (3 . "") (4 . "")
+ (5 . "") (6 . "") (7 . "") (8 . "a")
+ (9 . "b")))))
+ ))
+\f
+;;; Ripped off from "grep/tests/bre.tests".
+(define-test 'match-grep-bre
+ (multi-match-strings-test
+ '(((: "a" (: "b") "c")
+ "abc")
+ ((: "a" (:) "b")
+ "ab")
+ ((: (* "a")
+ (: bos "b" eos)
+ (* "c"))
+ "b")
+ ((:)
+ ("abc" (0 0)))
+ ((: "a"
+ (-> x (* "b"))
+ "c"
+ (backref x)
+ "d")
+ ("abbcbd" #f)
+ ("abbcbbd" (0 7 (x . "bb")))
+ ("abbcbbbd" #f))
+ ((: bos
+ (-> x nonl)
+ (backref x))
+ ("abc" #f))
+ ((: "a"
+ (* (-> x ("bc")) (backref x))
+ "d")
+ ("abbccd" (0 6 (x . "b") (x . "c")))
+ ("abbcbd" #f))
+ ((: "a"
+ (* (* (-> x "b")) (backref x))
+ "d")
+ ("abbbd" (0 5 (x . "b") (x . "b"))))
+ ((: (-> x "a")
+ (backref x)
+ "bcd")
+ ("aabcd" (0 5 (x . "a"))))
+ ((: (-> x "a")
+ (backref x)
+ "b"
+ (* "c")
+ "d")
+ ("aabcd" (0 5 (x . "a")))
+ ("aabd" (0 4 (x . "a")))
+ ("aabcccd" (0 7 (x . "a"))))
+ ((: (-> x "a")
+ (backref x)
+ "b"
+ (* "c")
+ ("ce")
+ "d")
+ ("aabcccd" (0 7 (x . "a"))))
+ ((: bos
+ (-> x "a")
+ (backref x)
+ "b"
+ (* "c")
+ "cd"
+ eos)
+ ("aabcccd" (0 7 (x . "a"))))
+ ((: (= 1 "a") "b")
+ "ab")
+ ((: (>= 1 "a") "b")
+ "ab")
+ ((: (** 1 2 "a") "b")
+ "aab")
+ ((: "a" (= 0 "b") "c")
+ "ac"
+ ("abc" #f))
+ ((: "a" (** 0 1 "b") "c")
+ "ac"
+ "abc"
+ ("abbc" #f))
+ ((: "a" (** 0 3 "b") "c")
+ "ac"
+ "abc"
+ "abbc"
+ "abbbc"
+ ("abbbbc" #f))
+ ((: "a" (** 1 0 "b") "c")
+ pattern-error)
+ ((: "a" (** #f 1 "b") "c")
+ pattern-error)
+ ((: "a" (= 1 "b") "c")
+ ("ac" #f)
+ "abc")
+ ((: "a" (** 1 3 "b") "c")
+ ("ac" #f)
+ "abc")
+ ((: "a" (= 2 "b") "c")
+ ("abc" #f)
+ "abbc")
+ ((: "a" (** 2 4 "b") "c")
+ ("abcabbc" #f))
+ ((: "a"
+ (? (-> x "b"))
+ "c"
+ (backref x)
+ "d")
+ "acd")
+ ((: (** 0 1 "-")
+ (+ ("0123456789"))
+ eos)
+ "-5"))))
+\f
+;;; Ripped off from "grep/tests/ere.tests".
+(define-test 'match-grep-ere
+ (multi-match-strings-test
+ '(((or "abc" "de")
+ "abc")
+ ((or "a" "b" "c")
+ ("abc" (0 1)))
+ ((: "a" nonl "c")
+ "abc")
+ ((: "a" ("bc") "d")
+ "abd")
+ ((: "a" (* "b") "c")
+ "abc")
+ ((: "a" (+ "b") "c")
+ "abc")
+ ((: "a" (? "b") "c")
+ "abc")
+ ((: "a" ("b") "c")
+ "abc")
+ ((: "a" ("ab") "c")
+ "abc")
+ ((: "a" (~ ("ab")) "c")
+ ("abc" #f)
+ "adc")
+ ((: "a" alphabetic "c")
+ "abc"
+ "adc")
+ ((: "a" (+ numeric) "c")
+ "a019c")
+ ((: "A" (+ lower-case) "C")
+ "AabC")
+ ((: "a" (+ upper-case) "c")
+ "aBCc")
+ ((: "a" (= 20 ("ab")))
+ "aaaaabaaaabaaaabaaaab")
+ ((: "a"
+ ("ab") ("ab") ("ab") ("ab")
+ ("ab") ("ab") ("ab") ("ab")
+ ("ab") ("ab") ("ab") ("ab")
+ ("ab") ("ab") ("ab") ("ab")
+ ("ab") ("ab") ("ab") ("ab"))
+ "aaaaabaaaabaaaabaaaab")
+ ((: "a"
+ ("ab") ("ab") ("ab") ("ab")
+ ("ab") ("ab") ("ab") ("ab")
+ ("ab") ("ab") ("ab") ("ab")
+ ("ab") ("ab") ("ab") ("ab")
+ ("ab") ("ab") ("ab") ("ab")
+ (or "wee" "week")
+ (or "knights" "night"))
+ "aaaaabaaaabaaaabaaaabweeknights")
+ ((: ("ab") ("cd") ("ef") ("gh")
+ ("ij") ("kl") ("mn"))
+ ("acegikmoq" (0 7)))
+ ((: ("ab") ("cd") ("ef") ("gh")
+ ("ij") ("kl") ("mn") ("op"))
+ ("acegikmoq" (0 8)))
+ ((: ("ab") ("cd") ("ef") ("gh")
+ ("ij") ("kl") ("mn") ("op")
+ ("qr"))
+ ("acegikmoqy" (0 9)))
+ ((: ("ab") ("cd") ("ef") ("gh")
+ ("ij") ("kl") ("mn") ("op")
+ ("q"))
+ ("acegikmoqy" (0 9)))
+ ("aBc"
+ ("Abc" #f))
+ ((: "a" (* ("Bc")) "d")
+ "acBd"
+ "aBcd"
+ "aBcBcBd"
+ ("aBCd" #f)
+ ("abcd" #f)
+ ("abBCcd" #f))
+ ((: "a" (~ ("b")) "c")
+ ("abc" #f)
+ "aBc"
+ "adc")
+ ((: ("a") "b" ("c"))
+ "abc")
+ ((: ("a") "b" ("a"))
+ "aba")
+ ((: ("abc") "b" ("abc"))
+ "abc")
+ ((: ("abc") "b" ("abd"))
+ ("abc" #f)
+ "abd")
+ ((: "a" (+ (? "b") "c") "d")
+ "accd")
+ ((* "a")
+ ("b" (0 0)))
+ ((: (or "wee" "week") (or "knights" "night"))
+ "weeknights")
+ ((: (or "we" "wee" "week" "frob") (or "knights" "night" "day"))
+ "weeknights")
+ ("abcdefghijklmnop"
+ "abcdefghijklmnop")
+ ("abcdefghijklmnopqrstuv"
+ "abcdefghijklmnopqrstuv")
+ ((or (: "CC" ("13") "1")
+ (: (= 21 "a")
+ ("23")
+ ("EO")
+ ("123")
+ ("Es")
+ ("12")
+ (= 15 "a")
+ "aa"
+ ("34")
+ ("EW")
+ "aaaaaaa"
+ ("X")
+ "a"))
+ "CC11"))))
+\f
+;; Ripped off from "grep/tests/khadafy.*".
+(define-test 'match-grep-muammar-qaddafi
+ (match-strings-test
+ '(: "M"
+ ("ou")
+ (? "'")
+ "a"
+ (+ "m")
+ ("ae")
+ "r "
+ (* nonl)
+ (? ("AEae") "l" ("- "))
+ ("GKQ")
+ (? "h")
+ (+ ("aeu"))
+ (+ ("dtz") (? ("dhz")))
+ "af"
+ ("iy"))
+ '("Muammar Qaddafi"
+ "Mo'ammar Gadhafi"
+ "Muammar Kaddafi"
+ "Muammar Qadhafi"
+ "Moammar El Kadhafi"
+ "Muammar Gadafi"
+ "Mu'ammar al-Qadafi"
+ "Moamer El Kazzafi"
+ "Moamar al-Gaddafi"
+ "Mu'ammar Al Qathafi"
+ "Muammar Al Qathafi"
+ "Mo'ammar el-Gadhafi"
+ "Moamar El Kadhafi"
+ "Muammar al-Qadhafi"
+ "Mu'ammar al-Qadhdhafi"
+ "Mu'ammar Qadafi"
+ "Moamar Gaddafi"
+ "Mu'ammar Qadhdhafi"
+ "Muammar Khaddafi"
+ "Muammar al-Khaddafi"
+ "Mu'amar al-Kadafi"
+ "Muammar Ghaddafy"
+ "Muammar Ghadafi"
+ "Muammar Ghaddafi"
+ "Muamar Kaddafi"
+ "Muammar Quathafi"
+ "Muammar Gheddafi"
+ "Muamar Al-Kaddafi"
+ "Moammar Khadafy"
+ "Moammar Qudhafi"
+ "Mu'ammar al-Qaddafi"
+ "Mu'ammar Muhammad Abu Minyar al-Qadhafi")))
+\f
+;; Ripped off from "grep/tests/spencer1.*".
+(define-test 'match-grep-spencer
+ (multi-match-strings-test
+ '(("abc"
+ "abc"
+ ("xbc" #f)
+ ("axc" #f)
+ ("abx" #f))
+ ((: "a" (* "b") "c")
+ "abc")
+ ((: "a" (* "b") "bc")
+ "abc"
+ "abbc"
+ "abbbbc")
+ ((: "a" (+ "b") "bc")
+ ("abc" #f)
+ "abbc"
+ "abbbbc"
+ ("abq" #f))
+ ((: "a" (? "b") "bc")
+ "abc"
+ "abbc"
+ ("abbbbc" #f))
+ ((: "a" (? "b") "c")
+ "abc")
+ ((: bos "abc" eos)
+ "abc"
+ ("abcc" #f))
+ ((: bos "abc")
+ ("abcc" (0 3)))
+ (bos
+ ("abc" (0 0)))
+ (eos
+ ""
+ ("a" #f))
+ ((: "a" nonl "c")
+ "abc"
+ "axc")
+ ((: "a" (* nonl) "c")
+ "axyzc"
+ ("axyzd" #f))
+ ((: "a" ("bc") "d")
+ ("abc" #f)
+ "abd")
+ ((: "a" ("bcd") "e")
+ ("abd" #f)
+ "ace")
+ ((: "a" ("bcd"))
+ "ac"
+ ("aac" #f))
+ ((: "a" (~ ("bc")) "d")
+ "aed"
+ ("abd" #f))
+ ((: (+ "a") (+ "b") "c")
+ "abc"
+ "aabbc"
+ ("aabbabc" #f))
+ ((* (* "a"))
+ ("-" (0 0)))
+ ((+ (* "a"))
+ ("-" (0 0)))
+ ((? (* "a"))
+ ("-" (0 0)))
+ ((* (or "a" (:)))
+ ("-" (0 0)))
+ ((* (or (* "a") "b"))
+ ("-" (0 0)))
+ ((* (or (+ "a") "b"))
+ "ab")
+ ((+ (or (+ "a") "b"))
+ "ab")
+ ((? (or (+ "a") "b"))
+ ("ba" (0 1))
+ ("ab" (0 1)))
+ ((* (~ ("ab")))
+ "cde")
+ ((: (* ("abc")) "d")
+ "abbbcd")
+ ((: (* ("abc")) "bcd")
+ "abcd")
+ ((or "a" "b" "c" "d" "e")
+ "e")
+ ((: (or "a" "b" "c" "d" "e") "f")
+ "ef")
+ ((: "abc" (* "d") "efg")
+ "abcdefg")
+ ("multiple words of text"
+ ("uh-uh" #f))
+ ("multiple words"
+ ("multiple words, yeah" (0 14)))
+ ((: (-> x nonl nonl nonl nonl)
+ (* nonl)
+ (backref x))
+ ("beriberi" (0 8 (x . "beri")))))))
\ No newline at end of file