--- /dev/null
+#| -*-Scheme-*-
+
+Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994,
+ 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
+ 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016,
+ 2017, 2018, 2019 Massachusetts Institute of Technology
+
+This file is part of MIT/GNU Scheme.
+
+MIT/GNU Scheme is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at
+your option) any later version.
+
+MIT/GNU Scheme is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with MIT/GNU Scheme; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,
+USA.
+
+|#
+
+;;;; Recursive regular-expression implementation
+;;; package: (runtime regexp recursive)
+
+;;; The compiler takes a regular sexpression and returns an
+;;; instruction. An instruction is a procedure that accepts a success
+;;; continuation, and returns a "linked instruction". But success
+;;; continuations and linked instructions have the same signature,
+;;; which encourages the use of a combinator language.
+
+(declare (usual-integrations))
+\f
+;;;; Instructions
+
+(define (insn:always-succeed)
+ (lambda (succeed)
+ succeed))
+
+(define (insn:always-fail)
+ (lambda (succeed)
+ succeed
+ (lambda (position groups fail)
+ position groups
+ (fail))))
+
+(define (insn:string-start)
+ (lambda (succeed)
+ (lambda (position groups fail)
+ (if (not (prev-char position))
+ (succeed position groups fail)
+ (fail)))))
+
+(define (insn:string-end)
+ (lambda (succeed)
+ (lambda (position groups fail)
+ (if (not (next-char position))
+ (succeed position groups fail)
+ (fail)))))
+
+(define (insn:line-start)
+ (lambda (succeed)
+ (lambda (position groups fail)
+ (if (let ((char (prev-char position)))
+ (or (not char)
+ (char=? char #\newline)))
+ (succeed position groups fail)
+ (fail)))))
+
+(define (insn:line-end)
+ (lambda (succeed)
+ (lambda (position groups fail)
+ (if (let ((char (next-char position)))
+ (or (not char)
+ (char=? char #\newline)))
+ (succeed position groups fail)
+ (fail)))))
+\f
+(define (insn:char-matching predicate)
+ (lambda (succeed)
+ (lambda (position groups fail)
+ (if (let ((char (next-char position)))
+ (and char
+ (predicate char)))
+ (succeed (next-position position) groups fail)
+ (fail)))))
+
+(define (insn:char char fold-case?)
+ (insn:char-matching
+ ((if fold-case? char-ci=-predicate char=-predicate) char)))
+
+(define (insn:char-set char-set)
+ (insn:char-matching (char-set-predicate char-set)))
+
+(define (insn:inverse-char-set char-set)
+ (insn:char-set (char-set-invert char-set)))
+
+(define (insn:string string fold-case?)
+ (let ((end (string-length string)))
+ (cond ((fix:= end 0)
+ (insn:always-succeed))
+ ((fix:= end 1)
+ (insn:char (string-ref string 0) fold-case?))
+ (else
+ (let ((c= (if fold-case? char-ci=? char=?)))
+ (lambda (succeed)
+ (lambda (position groups fail)
+ (let loop ((i 0) (position position))
+ (if (fix:< i end)
+ (if (let ((char (next-char position)))
+ (and char
+ (c= char (string-ref string i))))
+ (loop (fix:+ i 1) (next-position position))
+ (fail))
+ (succeed position groups fail))))))))))
+
+(define (insn:group key insn)
+ (let ((start
+ (lambda (succeed)
+ (lambda (position groups fail)
+ (succeed position
+ (start-group key position groups)
+ fail))))
+ (end
+ (lambda (succeed)
+ (lambda (position groups fail)
+ (succeed position
+ (end-group key position groups)
+ fail)))))
+ (lambda (succeed)
+ (start (insn (end succeed))))))
+
+(define (insn:group-ref key)
+ (lambda (succeed)
+ (lambda (position groups fail)
+ ((let ((group (find-group key groups)))
+ (if group
+ ((insn:string (group-value group) #f) succeed)
+ ;; This can happen with (* (GROUP ...)), but in other cases it
+ ;; would be an error.
+ succeed))
+ position groups fail))))
+\f
+(define (insn:seq insns)
+ (lambda (succeed)
+ (fold-right (lambda (insn next)
+ (insn next))
+ succeed
+ insns)))
+
+(define (insn:alt insns)
+ (reduce-right (lambda (insn1 insn2)
+ (lambda (succeed)
+ (%failure-chain (insn1 succeed)
+ (insn2 succeed))))
+ (insn:always-fail)
+ insns))
+
+(define (insn:? insn)
+ (lambda (succeed)
+ (%failure-chain (insn succeed) succeed)))
+
+(define (insn:?? insn)
+ (lambda (succeed)
+ (%failure-chain succeed (insn succeed))))
+
+;;; The next two operations must fail when the instruction makes no
+;;; progress in a given iteration. Otherwise patterns like (* (SEQ))
+;;; will loop forever.
+
+(define (insn:* insn)
+ (lambda (succeed)
+ (define (loop position groups fail)
+ ((%failure-chain (insn
+ (lambda (position* groups* fail*)
+ (if (same-positions? position* position)
+ (fail*)
+ (loop position* groups* fail*))))
+ succeed)
+ position groups fail))
+ loop))
+
+(define (insn:*? insn)
+ (lambda (succeed)
+ (define (loop position groups fail)
+ ((%failure-chain succeed
+ (insn
+ (lambda (position* groups* fail*)
+ (if (same-positions? position* position)
+ (fail*)
+ (loop position* groups* fail*)))))
+ position groups fail))
+ loop))
+
+(define (%failure-chain s1 s2)
+ (lambda (position groups fail)
+ (s1 position
+ groups
+ (lambda () (s2 position groups fail)))))
+\f
+(define (insn:** n m insn)
+ (%repeat n m insn
+ (lambda (limit insn)
+ (%hybrid-chain limit
+ (lambda (succeed)
+ (lambda (continue)
+ (%failure-chain (insn continue) succeed)))))
+ insn:*))
+
+(define (insn:**? n m insn)
+ (%repeat n m insn
+ (lambda (limit insn)
+ (%hybrid-chain limit
+ (lambda (succeed)
+ (lambda (continue)
+ (%failure-chain succeed (insn continue))))))
+ insn:*?))
+
+(define (%repeat n m insn repeat-limited repeat-unlimited)
+ (let ((insn1 (%repeat-exactly n insn))
+ (insn2
+ (if m
+ (repeat-limited (- m n) insn)
+ (repeat-unlimited insn))))
+ (lambda (succeed)
+ (insn1 (insn2 succeed)))))
+
+(define (%repeat-exactly n insn)
+ (%hybrid-chain n
+ (lambda (succeed)
+ (declare (ignore succeed))
+ insn)))
+
+(define (%hybrid-chain limit pre-linker)
+ (if (<= limit 8)
+ (%immediate-chain limit pre-linker)
+ (%delayed-chain limit pre-linker)))
+
+(define (%immediate-chain limit pre-linker)
+ (lambda (succeed)
+ (let ((linker (pre-linker succeed)))
+ (let loop ((i 0))
+ (if (< i limit)
+ (linker (loop (+ i 1)))
+ succeed)))))
+
+(define (%delayed-chain limit pre-linker)
+ (lambda (succeed)
+ (let ((linker (pre-linker succeed)))
+ (let loop ((i 0))
+ (if (< i limit)
+ (lambda (position groups fail)
+ ((linker (loop (+ i 1))) position groups fail))
+ succeed)))))
+\f
+;;;; Positions
+
+(define-record-type <position>
+ (make-position marker index next-char prev-char next-pos prev-pos)
+ position?
+ (marker pos-marker)
+ (index pos-index)
+ (next-char next-char)
+ (prev-char prev-char)
+ (next-pos %next-pos)
+ (prev-pos %prev-pos))
+
+(define (next-position position)
+ ((%next-pos position)))
+
+(define (prev-position position)
+ ((%prev-pos position)))
+
+(define (same-positions? p1 p2)
+ (and (eq? (pos-marker p1) (pos-marker p2))
+ (fix:= (pos-index p1) (pos-index p2))))
+
+(define (extract-string start-position end-position)
+ (let ((builder (string-builder)))
+ (do ((position start-position (next-position position)))
+ ((same-positions? position end-position))
+ (builder (next-char position)))
+ (builder)))
+
+(define (make-source-position source)
+ (let ((marker (list 'source-position)))
+ (let loop
+ ((index 0)
+ (next-char (source))
+ (prev-char #f)
+ (prev-pos #f))
+ (define this
+ (make-position marker index next-char prev-char
+ (lambda ()
+ (loop (fix:+ index 1) (source) next-char this))
+ (lambda ()
+ prev-pos)))
+ this)))
+
+(define (make-string-position string start end)
+ (let ((marker (list 'string-position)))
+ (let loop ((index start))
+ (define this
+ (make-position marker index
+ (and (fix:< index end)
+ (string-ref string index))
+ (and (fix:> index start)
+ (string-ref string (fix:- index 1)))
+ (lambda ()
+ (if (fix:< index end)
+ (loop (fix:+ index 1))
+ this))
+ (lambda ()
+ (if (fix:>= index start)
+ (loop (fix:- index 1))
+ this))))
+ this)))
+\f
+;;;; Groups
+
+(define (make-groups)
+
+ (define (state started-groups ended-groups)
+
+ (define (start key position)
+ (if (assv key started-groups)
+ (error "Incorrectly nested group:" key))
+ (state (cons (cons key position) started-groups)
+ ended-groups))
+
+ (define (end key position)
+ (if (not (and (pair? started-groups)
+ (eqv? (caar started-groups) key)))
+ (error "Incorrectly nested group:" key))
+ (state (cdr started-groups)
+ (cons (make-group key
+ (cdar started-groups)
+ position)
+ ended-groups)))
+
+ (define (*find key)
+ (if (assv key started-groups)
+ (error "Can't refer to unfinished group:" key))
+ (find (lambda (g)
+ (eqv? key (group-key g)))
+ ended-groups))
+
+ (define (get-all)
+ (reverse ended-groups))
+
+ (%make-groups start end *find get-all))
+
+ (state '() '()))
+
+(define-record-type <groups>
+ (%make-groups start end find all)
+ groups?
+ (start groups:start)
+ (end groups:end)
+ (find groups:find)
+ (all groups:all))
+
+(define (start-group key position groups)
+ ((groups:start groups) key position))
+
+(define (end-group key position groups)
+ ((groups:end groups) key position))
+
+(define (find-group key groups)
+ ((groups:find groups) key))
+
+(define (all-groups groups)
+ ((groups:all groups)))
+
+(define (make-group key start-position end-position)
+ (%make-group key
+ (pos-index start-position)
+ (pos-index end-position)
+ (extract-string start-position end-position)))
+
+(define-record-type <group>
+ (%make-group key start end value)
+ group?
+ (key group-key)
+ (start group-start)
+ (end group-end)
+ (value group-value))
\ No newline at end of file
--- /dev/null
+#| -*- Mode: Scheme; keyword-style: none -*-
+
+Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994,
+ 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
+ 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016,
+ 2017, 2018, 2019 Massachusetts Institute of Technology
+
+This file is part of MIT/GNU Scheme.
+
+MIT/GNU Scheme is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at
+your option) any later version.
+
+MIT/GNU Scheme is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with MIT/GNU Scheme; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,
+USA.
+
+|#
+
+;;;; Rules
+;;; package: (runtime regexp rules)
+
+;;; A simple rule system. Supports list-structured patterns, suitable for
+;;; parsing syntax, as well as completely general rules.
+
+(declare (usual-integrations))
+\f
+(define-record-type <rule>
+ (general-rule key predicate operation)
+ rule?
+ (key rule-key)
+ (predicate rule-predicate)
+ (operation rule-operation))
+
+(define (pattern? object)
+ (if (pair? object)
+ (and (pattern? (car object))
+ (let loop ((object (cdr object)))
+ (if (pair? object)
+ (and (pattern? (car object))
+ (loop (cdr object)))
+ (or (null? object)
+ (unary-procedure? object)))))
+ (or (pattern-constant? object)
+ (unary-procedure? object))))
+(register-predicate! pattern? 'pattern)
+
+(define (pattern-constant? object)
+ (or (interned-symbol? object)
+ (number? object)
+ (char? object)
+ (boolean? object)))
+
+(define (pattern-rule pattern operation #!optional guard-pred)
+ (let ((predicate (pattern->predicate pattern 'pattern-rule))
+ (guard-pred (if (default-object? guard-pred) #f guard-pred)))
+ (receive (wrapper arity)
+ (pattern-calling-convention pattern 'pattern-rule)
+ (guarantee-procedure-of-arity operation arity 'pattern-rule)
+ (if guard-pred
+ (guarantee-procedure-of-arity guard-pred arity 'pattern-rule))
+ (general-rule pattern
+ (if guard-pred
+ (let ((wrapped (wrapper guard-pred)))
+ (lambda (object)
+ (and (predicate object)
+ (wrapped object))))
+ predicate)
+ (wrapper operation)))))
+
+(define (pattern->predicate pattern caller)
+ (cond ((or (pair? pattern) (null? pattern))
+ (list-predicate pattern caller))
+ ((or (interned-symbol? pattern)
+ (fixnum? pattern)
+ (char? pattern)
+ (boolean? pattern))
+ (lambda (object) (eq? pattern object)))
+ ((number? pattern)
+ (lambda (object) (eqv? pattern object)))
+ ((unary-procedure? pattern)
+ pattern)
+ (else
+ (error:not-a pattern? pattern caller))))
+\f
+(define (list-predicate pattern caller)
+ (let ((preds (parse-list-pattern pattern caller)))
+ (lambda (object)
+ (let loop ((preds preds) (object object))
+ (if (pair? preds)
+ (and (pair? object)
+ ((car preds) (car object))
+ (loop (cdr preds) (cdr object)))
+ (preds object))))))
+
+(define (parse-list-pattern pattern caller)
+ (let loop ((pattern pattern))
+ (if (pair? pattern)
+ (cons (pattern->predicate (car pattern) caller)
+ (loop (cdr pattern)))
+ (cond ((null? pattern) null?)
+ ((unary-procedure? pattern) (tail-predicate pattern))
+ (else (error:not-a pattern? pattern caller))))))
+
+(define (tail-predicate pred)
+ (define (predicate object)
+ (if (pair? object)
+ (and (pred (car object))
+ (predicate (cdr object)))
+ (null? object)))
+ predicate)
+
+(define (pattern-calling-convention pattern caller)
+ (cond ((pair? pattern)
+ (if (pattern-constant? (car pattern))
+ (values (lambda (procedure)
+ (lambda (object)
+ (apply procedure (cdr object))))
+ (pattern-arity (cdr pattern)))
+ (values (lambda (procedure)
+ (lambda (object)
+ (apply procedure object)))
+ (pattern-arity pattern))))
+ ((pattern-constant? pattern)
+ (values (lambda (procedure)
+ (lambda (object)
+ (declare (ignore object))
+ (procedure)))
+ (make-procedure-arity 0)))
+ ((unary-procedure? pattern)
+ (values (lambda (procedure)
+ procedure)
+ (make-procedure-arity 1)))
+ (else
+ (error:not-a pattern? pattern caller))))
+
+(define (pattern-arity pattern)
+ (let loop ((pattern pattern) (n 0))
+ (cond ((pair? pattern) (loop (cdr pattern) (+ n 1)))
+ ((null? pattern) (make-procedure-arity n))
+ (else (make-procedure-arity n #f)))))
+\f
+(define-record-type <rules>
+ (%make-rules name adder matcher getter)
+ rules?
+ (name rules-name)
+ (adder rules-adder)
+ (matcher rules-matcher)
+ (getter rules-getter))
+
+(define-print-method rules?
+ (standard-print-method 'rules
+ (lambda (rules)
+ (list (rules-name rules)))))
+
+(define-pp-describer rules?
+ (lambda (rules)
+ (let ((elts ((rules-getter rules))))
+ (map list
+ (iota (length elts))
+ elts))))
+
+(define (make-rules name)
+ (let ((rules '()))
+
+ (define (add! rule)
+ (set! rules
+ (cons rule
+ (remove! (lambda (rule*)
+ (equal? (rule-key rule)
+ (rule-key rule*)))
+ rules)))
+ unspecific)
+
+ (define (match object)
+ (let ((matched
+ (filter (lambda (rule)
+ ((rule-predicate rule) object))
+ rules)))
+ (and (pair? matched)
+ (begin
+ (if (pair? (cdr matched))
+ (error "Multiple rule matches:" matched object))
+ (car matched)))))
+
+ (define (get)
+ (list-copy rules))
+
+ (%make-rules name add! match get)))
+
+(define (rules-rewriter rules)
+ (let ((match (rules-matcher rules)))
+ (define (rewrite object)
+ (let ((rule (match object)))
+ (if rule
+ (rewrite ((rule-operation rule) object))
+ object)))
+ rewrite))
+
+(define (rules-definer rules)
+ (let ((adder (rules-adder rules)))
+ (lambda (pattern operation #!optional predicate)
+ (adder
+ (if (pattern? pattern)
+ (pattern-rule pattern operation predicate)
+ (general-rule pattern predicate operation))))))
+
+(add-boot-init! (lambda () (run-deferred-boot-actions 'regexp-rules)))
\ No newline at end of file
|#
;;;; Regular s-expressions
-;;; package: (runtime regular-sexpression)
-
-;;; The compiler takes a regular sexpression and returns an
-;;; instruction. An instruction is a procedure that accepts a success
-;;; continuation, and returns a "linked instruction". But success
-;;; continuations and linked instructions have the same signature,
-;;; which encourages the use of a combinator language.
+;;; package: (runtime regexp regsexp)
(declare (usual-integrations))
\f
+(define (regsexp? object)
+ (and (find-rule object) #t))
+(register-predicate! regsexp? 'regular-sexpression)
+
(define (compile-regsexp regsexp)
(%link-insn
- (bind-condition-handler (list condition-type:error)
- (lambda (condition)
- (signal-compile-error regsexp condition))
- (lambda ()
- (%compile-regsexp regsexp)))))
+ (parameterize ((%input-pattern regsexp))
+ (%compile-regsexp regsexp))))
+
+(define %input-pattern
+ (make-unsettable-parameter #f))
(define (%compile-regsexp regsexp)
- (cond ((unicode-char? regsexp)
- (insn:char regsexp #f))
- ((string? regsexp)
- (insn:string regsexp #f))
- ((and (pair? regsexp)
- (symbol? (car regsexp))
- (find (lambda (rule)
- (and (eq? (caar rule) (car regsexp))
- (syntax-match? (cdar rule) (cdr regsexp))))
- %compile-regsexp-rules))
- => (lambda (rule)
- (apply (cdr rule) (cdr regsexp))))
- (else
- (error "Ill-formed regular s-expression:" regsexp))))
+ (let ((rule (find-rule regsexp)))
+ (if (not rule)
+ (compile-error (%input-pattern) regsexp))
+ ((rule-operation rule) regsexp)))
(define (%link-insn insn)
(make-compiled-regsexp
(insn
(lambda (position groups fail)
- fail
- (cons (get-index position)
- ((groups 'get-all)))))))
+ (declare (ignore fail))
+ (cons position (all-groups groups))))))
(define-record-type <compiled-regsexp>
(make-compiled-regsexp impl)
((compiled-regsexp-impl crsexp)
start-position (make-groups) (lambda () #f))))
(and result
- (cons (get-index start-position)
- result))))
-
-(define (group-key? object)
+ (cons* (pos-index start-position)
+ (pos-index (car result))
+ (map (lambda (group)
+ (cons (group-key group)
+ (group-value group)))
+ (cdr result))))))
+
+(define (regsexp-group-key? object)
(or (fix:fixnum? object)
(unicode-char? object)
(symbol? object)))
(define condition-type:compile-regsexp)
-(define signal-compile-error)
+(define compile-error)
(define (initialize-conditions!)
(set! condition-type:compile-regsexp
(make-condition-type 'compile-regsexp condition-type:error
- '(pattern cause)
+ '(pattern element)
(lambda (condition port)
- (write (access-condition condition 'pattern) port)
- (write-string ": " port)
- (write-condition-report (access-condition condition 'cause) port))))
- (set! signal-compile-error
+ (write-string "Ill-formed regular s-expression: " port)
+ (write (access-condition condition 'element) port)
+ (write-string " from pattern: " port)
+ (write (access-condition condition 'pattern) port))))
+ (set! compile-error
(condition-signaller condition-type:compile-regsexp
- '(pattern cause)
+ '(pattern element)
standard-error-handler))
unspecific)
\f
;;;; Compiler rules
-(define (define-rule pattern compiler)
- (add-boot-init!
- (lambda ()
- (if (not (and (pair? pattern)
- (symbol? (car pattern))))
- (error:bad-range-argument pattern 'define-rule))
- (let ((p
- (find (lambda (p)
- (eq? (car p) (car pattern)))
- %compile-regsexp-rules)))
- (if p
- (set-cdr! p compiler)
- (begin
- (set! %compile-regsexp-rules
- (cons (cons pattern compiler)
- %compile-regsexp-rules))
- unspecific))))))
-
-(define %compile-regsexp-rules '())
+(define regsexp-rules)
+(defer-boot-action 'regexp-rules
+ (lambda ()
+ (set! regsexp-rules (make-rules 'regsexp))
+ unspecific))
+
+(define-deferred-procedure find-rule 'regexp-rules
+ (rules-matcher regsexp-rules))
+
+(define-deferred-procedure define-rule 'regexp-rules
+ (rules-definer regsexp-rules))
+
+(define (any-char? object)
+ (unicode-char? object))
+
+(define (min-arity? object)
+ (exact-nonnegative-integer? object))
+
+(define (max-arity? object)
+ (or (not object)
+ (exact-nonnegative-integer? object)))
+
+(define-rule "char"
+ (lambda (char) (insn:char char #f))
+ any-char?)
+
+(define-rule "string"
+ (lambda (string) (insn:string string #f))
+ string?)
(define-rule '(any-char)
(lambda ()
- (insn:char-matching (negate (char=-predicate #\newline)))))
-
-(define-rule `(char-ci datum)
- (lambda (char)
- (guarantee unicode-char? char)
- (insn:char char #t)))
-
-(define-rule `(string-ci datum)
- (lambda (string)
- (guarantee string? string)
- (insn:string string #t)))
-
-(define-rule '(char-matching expression)
- (lambda (predicate)
- (insn:char-matching
- (cond ((unary-procedure? predicate)
- predicate)
- ((and (syntax-match? '('not expression) predicate)
- (unary-procedure? (cadr predicate)))
- (cadr predicate))
- (else
- (error:not-a unary-procedure? predicate))))))
-
-(define-rule '(char-in * datum)
- (lambda items
- (insn:char-set (char-set* items))))
-
-(define-rule '(char-not-in * datum)
- (lambda items
- (insn:inverse-char-set (char-set* items))))
-
-(define-rule '(legacy-char-syntax datum)
- (lambda (code)
- (insn:char-matching
- (if (or (char=? code #\-) (char=? code #\space))
- char-whitespace?
- (syntax-code-predicate code)))))
+ (insn:char-matching (complement (char=-predicate #\newline)))))
-(define-rule '(inverse-legacy-char-syntax datum)
- (lambda (code)
- (insn:char-matching
- (negate
- (if (or (char=? code #\-) (char=? code #\space))
- char-whitespace?
- (syntax-code-predicate code))))))
+(define-rule `(char-ci ,any-char?)
+ (lambda (char) (insn:char char #t)))
+
+(define-rule `(string-ci ,string?)
+ (lambda (string) (insn:string string #t)))
+
+(define-rule `(char-matching ,unary-procedure?)
+ (lambda (predicate) (insn:char-matching predicate)))
+
+(define-rule `(char-matching (not ,unary-procedure?))
+ (lambda (expr) (insn:char-matching (complement (cadr expr)))))
-(define (negate predicate)
- (lambda (object)
- (not (predicate object))))
+(define-rule `(char-in . ,cpl-element?)
+ (lambda items (insn:char-set (char-set* items))))
+(define-rule `(char-not-in . ,cpl-element?)
+ (lambda items (insn:inverse-char-set (char-set* items))))
+
+(define (syntax-code? object)
+ (or (eqv? object #\-)
+ (eqv? object #\space)
+ (char-syntax-code? object)))
+
+(define-rule `(legacy-char-syntax ,syntax-code?)
+ (lambda (code)
+ (insn:char-matching (syntax-code-predicate code))))
+
+(define-rule `(inverse-legacy-char-syntax ,syntax-code?)
+ (lambda (code)
+ (insn:char-matching (complement (syntax-code-predicate code)))))
+\f
(define-rule '(line-start) (lambda () (insn:line-start)))
(define-rule '(line-end) (lambda () (insn:line-end)))
(define-rule '(string-start) (lambda () (insn:string-start)))
(define-rule '(string-end) (lambda () (insn:string-end)))
-\f
-(define-rule '(? form) ;greedy 0 or 1
+
+(define-rule `(? ,regsexp?) ;greedy 0 or 1
(lambda (regsexp)
(insn:? (%compile-regsexp regsexp))))
-(define-rule '(* form) ;greedy 0 or more
+(define-rule `(* ,regsexp?) ;greedy 0 or more
(lambda (regsexp)
(insn:* (%compile-regsexp regsexp))))
-(define-rule '(+ form) ;greedy 1 or more
+(define-rule `(+ ,regsexp?) ;greedy 1 or more
(lambda (regsexp)
- (%compile-regsexp `(** 1 #f ,regsexp))))
+ (insn:** 1 #f (%compile-regsexp regsexp))))
-(define-rule '(?? form) ;shy 0 or 1
+(define-rule `(?? ,regsexp?) ;shy 0 or 1
(lambda (regsexp)
(insn:?? (%compile-regsexp regsexp))))
-(define-rule '(*? form) ;shy 0 or more
+(define-rule `(*? ,regsexp?) ;shy 0 or more
(lambda (regsexp)
(insn:*? (%compile-regsexp regsexp))))
-(define-rule '(+? form) ;shy 1 or more
+(define-rule `(+? ,regsexp?) ;shy 1 or more
(lambda (regsexp)
- (%compile-regsexp `(**? 1 #f ,regsexp))))
+ (insn:**? 1 #f (%compile-regsexp regsexp))))
-(define-rule '(** datum form) ;greedy exactly N
+(define-rule `(** ,min-arity? ,regsexp?) ;greedy exactly N
(lambda (n regsexp)
- (guarantee exact-nonnegative-integer? n)
(insn:** n n (%compile-regsexp regsexp))))
-(define-rule '(**? datum form) ;shy exactly N
+(define-rule `(**? ,min-arity? ,regsexp?) ;shy exactly N
(lambda (n regsexp)
- (guarantee exact-nonnegative-integer? n)
(insn:**? n n (%compile-regsexp regsexp))))
-(define-rule '(** datum datum form) ;greedy between N and M
+(define-rule `(** ,min-arity? ,max-arity? ,regsexp?) ;greedy between N and M
+ (lambda (n m regsexp) (insn:** n m (%compile-regsexp regsexp)))
(lambda (n m regsexp)
- (check-repeat-2-args n m)
- (insn:** n m (%compile-regsexp regsexp))))
+ (declare (ignore regsexp))
+ (or (not m)
+ (<= n m))))
-(define-rule '(**? datum datum form) ;shy begin N and M
+(define-rule `(**? ,min-arity? ,max-arity? ,regsexp?) ;shy between N and M
+ (lambda (n m regsexp) (insn:**? n m (%compile-regsexp regsexp)))
(lambda (n m regsexp)
- (check-repeat-2-args n m)
- (insn:**? n m (%compile-regsexp regsexp))))
-
-(define (check-repeat-2-args n m)
- (guarantee exact-nonnegative-integer? n)
- (if m
- (begin
- (guarantee exact-nonnegative-integer? m)
- (if (not (<= n m))
- (error "Repeat lower limit greater than upper limit:" n m)))))
-
-(define-rule '(alt * form)
+ (declare (ignore regsexp))
+ (or (not m)
+ (<= n m))))
+
+(define-rule `(alt . ,regsexp?)
(lambda regsexps
- (insn:alt (map %compile-regsexp regsexps))))
+ (insn:alt (map-in-order %compile-regsexp regsexps))))
-(define-rule '(seq * form)
+(define-rule `(seq . ,regsexp?)
(lambda regsexps
- (insn:seq (map %compile-regsexp regsexps))))
+ (insn:seq (map-in-order %compile-regsexp regsexps))))
-(define-rule `(group datum form)
+(define-rule `(group ,regsexp-group-key? ,regsexp?)
(lambda (key regsexp)
- (guarantee group-key? key)
(insn:group key (%compile-regsexp regsexp))))
-(define-rule `(group-ref datum)
+(define-rule `(group-ref ,regsexp-group-key?)
(lambda (key)
- (guarantee group-key? key)
(insn:group-ref key)))
\f
-;;;; Instructions
-
-(define (insn:always-succeed)
- (lambda (succeed)
- succeed))
-
-(define (insn:always-fail)
- (lambda (succeed)
- succeed
- (lambda (position groups fail)
- position groups
- (fail))))
-
-(define (insn:string-start)
- (lambda (succeed)
- (lambda (position groups fail)
- (if (not (prev-char position))
- (succeed position groups fail)
- (fail)))))
-
-(define (insn:string-end)
- (lambda (succeed)
- (lambda (position groups fail)
- (if (not (next-char position))
- (succeed position groups fail)
- (fail)))))
-
-(define (insn:line-start)
- (lambda (succeed)
- (lambda (position groups fail)
- (if (let ((char (prev-char position)))
- (or (not char)
- (char=? char #\newline)))
- (succeed position groups fail)
- (fail)))))
-
-(define (insn:line-end)
- (lambda (succeed)
- (lambda (position groups fail)
- (if (let ((char (next-char position)))
- (or (not char)
- (char=? char #\newline)))
- (succeed position groups fail)
- (fail)))))
-\f
-(define (insn:char-matching predicate)
- (lambda (succeed)
- (lambda (position groups fail)
- (if (let ((char (next-char position)))
- (and char
- (predicate char)))
- (succeed (next-position position) groups fail)
- (fail)))))
-
-(define (insn:char char fold-case?)
- (insn:char-matching
- ((if fold-case? char-ci=-predicate char=-predicate) char)))
-
-(define (insn:char-set char-set)
- (insn:char-matching (char-set-predicate char-set)))
-
-(define (insn:inverse-char-set char-set)
- (insn:char-matching (negate (char-set-predicate char-set))))
-
-(define (insn:string string fold-case?)
- (let ((end (string-length string)))
- (cond ((fix:= end 0)
- (insn:always-succeed))
- ((fix:= end 1)
- (insn:char (string-ref string 0) fold-case?))
- (else
- (let ((c= (if fold-case? char-ci=? char=?)))
- (lambda (succeed)
- (lambda (position groups fail)
- (let loop ((i 0) (position position))
- (if (fix:< i end)
- (if (let ((char (next-char position)))
- (and char
- (c= char (string-ref string i))))
- (loop (fix:+ i 1) (next-position position))
- (fail))
- (succeed position groups fail))))))))))
-
-(define (insn:group key insn)
- (let ((start
- (lambda (succeed)
- (lambda (position groups fail)
- (succeed position
- ((groups 'start) key position)
- fail))))
- (end
- (lambda (succeed)
- (lambda (position groups fail)
- (succeed position
- ((groups 'end) key position)
- fail)))))
- (lambda (succeed)
- (start (insn (end succeed))))))
-
-(define (insn:group-ref key)
- (lambda (succeed)
- (lambda (position groups fail)
- ((let ((value ((groups 'get-value) key)))
- (if value
- ((insn:string value #f) succeed)
- ;; This can happen with (* (GROUP ...)), but in other cases it
- ;; would be an error.
- succeed))
- position groups fail))))
-\f
-(define (insn:seq insns)
- (lambda (succeed)
- (fold-right (lambda (insn next)
- (insn next))
- succeed
- insns)))
-
-(define (insn:alt insns)
- (reduce-right (lambda (insn1 insn2)
- (lambda (succeed)
- (%failure-chain (insn1 succeed)
- (insn2 succeed))))
- (insn:always-fail)
- insns))
-
-(define (insn:? insn)
- (lambda (succeed)
- (%failure-chain (insn succeed) succeed)))
-
-(define (insn:?? insn)
- (lambda (succeed)
- (%failure-chain succeed (insn succeed))))
-
-;;; The next two operations must fail when the instruction makes no
-;;; progress in a given iteration. Otherwise patterns like (* (SEQ))
-;;; will loop forever.
-
-(define (insn:* insn)
- (lambda (succeed)
- (define (loop position groups fail)
- ((%failure-chain (insn
- (lambda (position* groups* fail*)
- (if (same-positions? position* position)
- (fail*)
- (loop position* groups* fail*))))
- succeed)
- position groups fail))
- loop))
-
-(define (insn:*? insn)
- (lambda (succeed)
- (define (loop position groups fail)
- ((%failure-chain succeed
- (insn
- (lambda (position* groups* fail*)
- (if (same-positions? position* position)
- (fail*)
- (loop position* groups* fail*)))))
- position groups fail))
- loop))
-
-(define (%failure-chain s1 s2)
- (lambda (position groups fail)
- (s1 position
- groups
- (lambda () (s2 position groups fail)))))
-\f
-(define (insn:** n m insn)
- (%repeat n m insn
- (lambda (limit insn)
- (%hybrid-chain limit
- (lambda (succeed)
- (lambda (continue)
- (%failure-chain (insn continue) succeed)))))
- insn:*))
-
-(define (insn:**? n m insn)
- (%repeat n m insn
- (lambda (limit insn)
- (%hybrid-chain limit
- (lambda (succeed)
- (lambda (continue)
- (%failure-chain succeed (insn continue))))))
- insn:*?))
-
-(define (%repeat n m insn repeat-limited repeat-unlimited)
- (let ((insn1 (%repeat-exactly n insn))
- (insn2
- (if m
- (repeat-limited (- m n) insn)
- (repeat-unlimited insn))))
- (lambda (succeed)
- (insn1 (insn2 succeed)))))
-
-(define (%repeat-exactly n insn)
- (%hybrid-chain n
- (lambda (succeed)
- succeed
- insn)))
-
-(define (%hybrid-chain limit pre-linker)
- (if (<= limit 8)
- (%immediate-chain limit pre-linker)
- (%delayed-chain limit pre-linker)))
-
-(define (%immediate-chain limit pre-linker)
- (lambda (succeed)
- (let ((linker (pre-linker succeed)))
- (let loop ((i 0))
- (if (< i limit)
- (linker (loop (+ i 1)))
- succeed)))))
-
-(define (%delayed-chain limit pre-linker)
- (lambda (succeed)
- (let ((linker (pre-linker succeed)))
- (let loop ((i 0))
- (if (< i limit)
- (lambda (position groups fail)
- ((linker (loop (+ i 1))) position groups fail))
- succeed)))))
-\f
-;;;; Positions
-
-(define (get-index position)
- ((position 'get-index)))
-
-(define (next-char position)
- ((position 'next-char)))
-
-(define (next-position position)
- ((position 'next-position)))
-
-(define (prev-char position)
- ((position 'prev-char)))
-
-(define (prev-position position)
- ((position 'prev-position)))
-
-(define (same-positions? p1 p2)
- (and (eq? ((p1 'get-marker)) ((p2 'get-marker)))
- (fix:= ((p1 'get-index)) ((p2 'get-index)))))
-
-(define (make-source-position source)
- (let ((marker (list 'source-position)))
-
- (define (at-index index next-char prev-char prev-position)
-
- (define (next-position)
- (at-index (fix:+ index 1) (source) next-char this))
-
- (define (this operator)
- (case operator
- ((get-marker) (lambda () marker))
- ((get-index) (lambda () index))
- ((next-char) (lambda () next-char))
- ((next-position) next-position)
- ((prev-char) (lambda () prev-char))
- ((prev-position) (lambda () prev-position))
- (else (error "Unknown operator:" operator))))
-
- this)
-
- (at-index 0 (source) #f #f)))
-
-(define (make-string-position string start end)
- (let ((marker (list 'string-position)))
-
- (define (at-index index)
-
- (define (next-char)
- (and (fix:< index end)
- (string-ref string index)))
-
- (define (next-position)
- (at-index (fix:+ index 1)))
-
- (define (prev-char)
- (and (fix:> index start)
- (string-ref string (fix:- index 1))))
-
- (define (prev-position)
- (at-index (fix:- index 1)))
-
- (lambda (operator)
- (case operator
- ((get-marker) (lambda () marker))
- ((get-index) (lambda () index))
- ((next-char) next-char)
- ((next-position) next-position)
- ((prev-char) prev-char)
- ((prev-position) prev-position)
- (else (error "Unknown operator:" operator)))))
-
- (at-index start)))
-\f
-;;;; Groups
-
-(define (make-groups)
-
- (define (state started-groups ended-groups)
-
- (define (start key position)
- (if (assv key started-groups)
- (error "Incorrectly nested group:" key))
- (state (cons (cons key position) started-groups)
- ended-groups))
-
- (define (end key position)
- (if (not (and (pair? started-groups)
- (eqv? (caar started-groups) key)))
- (error "Incorrectly nested group:" key))
- (state (cdr started-groups)
- (cons (finish-group key
- (cdar started-groups)
- position)
- ended-groups)))
-
- (define (finish-group key start-position end-position)
- (cons key
- (let loop ((position end-position) (chars '()))
- (if (same-positions? position start-position)
- (list->string chars)
- (let ((char (prev-char position)))
- (loop (prev-position position)
- (cons char chars)))))))
-
- (define (get-value key)
- (if (assv key started-groups)
- (error "Can't refer to unfinished group:" key))
- (let ((p (assv key ended-groups)))
- (and p
- (cdr p))))
-
- (lambda (operator)
- (case operator
- ((start) start)
- ((end) end)
- ((get-value) get-value)
- ((get-all) (lambda () (reverse ended-groups)))
- (else (error "Unknown operator:" operator)))))
-
- (state '() '()))
-\f
-;;;; Match and search
+;;;; Match, search, and replace
(define (regsexp-match-string crsexp string #!optional start end)
- (let* ((caller 'regsexp-match-string)
- (end (fix:end-index end (string-length string) caller))
- (start (fix:start-index start end caller)))
+ (let ((caller 'regsexp-match-string))
(guarantee nfc-string? string caller)
- (top-level-match crsexp (make-string-position string start end))))
+ (let* ((end (fix:end-index end (string-length string) caller))
+ (start (fix:start-index start end caller)))
+ (top-level-match crsexp (make-string-position string start end)))))
(define (regsexp-search-string-forward crsexp string #!optional start end)
- (let* ((caller 'regsexp-search-string-forward)
- (end (fix:end-index end (string-length string) caller))
- (start (fix:start-index start end caller)))
+ (let ((caller 'regsexp-search-string-forward))
(guarantee nfc-string? string caller)
- (let loop ((position (make-string-position string start end)))
- (or (top-level-match crsexp position)
- (and (next-char position)
- (loop (next-position position)))))))
+ (let* ((end (fix:end-index end (string-length string) caller))
+ (start (fix:start-index start end caller)))
+ (let loop ((position (make-string-position string start end)))
+ (or (top-level-match crsexp position)
+ (and (next-char position)
+ (loop (next-position position))))))))
(define (regsexp-match-input-port crsexp port)
+ (guarantee textual-input-port? port 'regsexp-match-input-port)
(top-level-match crsexp
(make-source-position
(lambda ()
(if (eof-object? char)
#f
char))))))
+
+(define (regsexp-match? object)
+ (and (pair? object)
+ (exact-nonnegative-integer? (car object))
+ (pair? (cdr object))
+ (exact-nonnegative-integer? (cadr object))
+ (<= (car object) (cadr object))
+ (list? (cddr object))
+ (every (lambda (elt)
+ (and (pair? elt)
+ (regsexp-group-key? (car elt))
+ (string? (cdr elt))))
+ (cddr object))))
+(register-predicate! regsexp-match? 'regsexp-match)
+
+(define (match-value key match caller)
+ (let ((p (assv key (cddr match))))
+ (if (not p)
+ (error:bad-range-argument key caller))
+ (cdr p)))
+
+(define (regsexp-replacement? object)
+ (or (string? object)
+ (regsexp-group-key? object)
+ (and (list? object)
+ (every regsexp-replacement? object))))
+(register-predicate! regsexp-replacement? 'regsexp-replacement)
+
+(define (regsexp-replacer replacement)
+ (let ((replacer (%regsexp-replacer replacement 'regsexp-replacer)))
+ (lambda (match)
+ (guarantee regsexp-match? match 'regsexp-replacer)
+ (let ((builder (string-builder)))
+ (replacer builder match)
+ (builder)))))
+
+(define (%regsexp-replacer replacement caller)
+ (let loop ((r replacement))
+ (cond ((string? r)
+ (lambda (builder match)
+ (declare (ignore match))
+ (builder r)))
+ ((regsexp-group-key? r)
+ (lambda (builder match)
+ (builder (match-value r match caller))))
+ ((list? r)
+ (let ((elts (map loop r)))
+ (lambda (builder match)
+ (for-each (lambda (elt) (elt builder match)) elts))))
+ (else
+ (error:not-a regsexp-replacement? r caller)))))
\f
;;;; Convert regexp pattern to regsexp