(fix:<= (fix:and #xf object) 12)))
(error:bad-range-argument object procedure)))
+(define (char-syntax-code? object)
+ (and (char? object)
+ (let ((n (vector-length char-syntax-codes)))
+ (let loop ((i 0))
+ (and (fix:< i n)
+ (or (char=? (string-ref (vector-ref char-syntax-codes i) 0)
+ object)
+ (loop (fix:+ i 1))))))))
+
+(define (char->syntax-code table char)
+ (string-ref (vector-ref char-syntax-codes
+ (fix:and #xf (get-char-syntax table char)))
+ 0))
+
(define char-syntax-codes
'#(" " "." "w" "_" "(" ")" "'" "\"" "$" "\\" "/" "<" ">"))
-
+\f
(define (substring-find-next-char-of-syntax string start end table code)
(guarantee 8-bit-string? string 'substring-find-next-char-of-syntax)
(let ((index
- (string-find-first-index (syntax-code-predicate table code)
+ (string-find-first-index (syntax-code-predicate code table)
(string-slice string start end))))
(and index
(fix:+ start index))))
(guarantee 8-bit-string? string 'substring-find-next-char-not-of-syntax)
(let ((index
(string-find-first-index (let ((pred
- (syntax-code-predicate table code)))
+ (syntax-code-predicate code table)))
(lambda (char)
(not (pred char))))
(string-slice string start end))))
(and index
(fix:+ start index))))
-(define (syntax-code-predicate table code)
- (lambda (char)
- (char=? code (char->syntax-code table char))))
-
-(define (char->syntax-code table char)
- (string-ref (vector-ref char-syntax-codes
- (fix:and #xf (get-char-syntax table char)))
- 0))
\ No newline at end of file
+(define (syntax-code-predicate code #!optional table)
+ (guarantee char-syntax-code? code 'syntax-code-predicate)
+ (let ((entries
+ (char-syntax-table/entries
+ (if (default-object? table)
+ standard-char-syntax-table
+ (begin
+ (guarantee char-syntax-table? table 'syntax-code-predicate)
+ table)))))
+ (lambda (char)
+ (let ((cp (char->integer char)))
+ (and (fix:< cp #x100)
+ (char=? (string-ref (vector-ref char-syntax-codes
+ (fix:and #x0F
+ (vector-ref entries cp)))
+ 0)
+ code))))))
\ No newline at end of file
(define-rule '(ANY-CHAR)
(lambda ()
- (%compile-regsexp '(INVERSE-CHAR-SET "\n"))))
+ (insn:test-char (negate (char=-predicate #\newline)))))
+
+(define-rule '(test-char datum)
+ (lambda (predicate)
+ (insn:test-char
+ (if (and (pair? predicate)
+ (eq? (car predicate) 'not)
+ (pair? (cdr predicate))
+ (null? (cddr predicate)))
+ (negate (cadr predicate))
+ predicate))))
+
+(define (negate predicate)
+ (lambda (object)
+ (not (predicate object))))
(define-rule '(+ FORM)
(lambda (regsexp)
(lambda items
(insn:inverse-char-set (char-set* items))))
+(define-rule '(char-syntax datum)
+ (lambda (code)
+ (insn:test-char
+ (if (or (char=? code #\-)
+ (char=? code #\space))
+ char-whitespace?
+ (syntax-code-predicate code)))))
+
+(define-rule '(inverse-char-syntax datum)
+ (lambda (code)
+ (insn:test-char
+ (negate
+ (if (or (char=? code #\-)
+ (char=? code #\space))
+ char-whitespace?
+ (syntax-code-predicate code))))))
+
(define-rule '(LINE-START) (lambda () (insn:line-start)))
(define-rule '(LINE-END) (lambda () (insn:line-end)))
(define-rule '(STRING-START) (lambda () (insn:string-start)))
(succeed position groups fail)
(fail)))))
\f
-(define (insn:char char)
+(define (insn:test-char predicate)
(lambda (succeed)
(lambda (position groups fail)
- (if (eqv? (next-char position) char)
+ (if (let ((char (next-char position)))
+ (and char
+ (predicate char)))
(succeed (next-position position) groups fail)
(fail)))))
+(define (insn:char char)
+ (insn:test-char (char=-predicate char)))
+
+(define (insn:char-set char-set)
+ (insn:test-char (char-set-predicate char-set)))
+
+(define (insn:inverse-char-set char-set)
+ (insn:test-char (negate (char-set-predicate char-set))))
+
(define (insn:chars chars)
(lambda (succeed)
(lambda (position groups fail)
(loop (fix:+ i 1) (next-position position))
(fail)))
(succeed position groups fail)))))))))
-
-(define (insn:char-set char-set)
- (lambda (succeed)
- (lambda (position groups fail)
- (if (let ((char (next-char position)))
- (and char
- (char-in-set? char char-set)))
- (succeed (next-position position) groups fail)
- (fail)))))
-
-(define (insn:inverse-char-set char-set)
- (lambda (succeed)
- (lambda (position groups fail)
- (if (let ((char (next-char position)))
- (and char
- (not (char-in-set? char char-set))))
- (succeed (next-position position) groups fail)
- (fail)))))
\f
(define (insn:group key insn)
(insn:seq (list (%insn:start-group key)
(lambda (p1 p2)
(declare (no-type-checks))
(and (eq? (cdr p1) (cdr p2))
- (fix:= (car p1) (car p2)))))
\ No newline at end of file
+ (fix:= (car p1) (car p2)))))
+\f
+(define (re-pattern->regsexp pattern)
+ (let ((end (string-length pattern)))
+ (let ((index 0)
+ (this-alt '())
+ (prev-alts '())
+ (group-number 0)
+ (pending-groups '()))
+
+ (define (have-next?)
+ (fix:< index end))
+
+ (define (get-next)
+ (let ((char (string-ref pattern index)))
+ (set! index (fix:+ index 1))
+ char))
+
+ (define (next-is? char)
+ (and (char=? (string-ref pattern index) char)
+ (begin
+ (set! index (fix:+ index 1))
+ #t)))
+
+ (define (get-expr)
+ (let ((alt (get-alt)))
+ (if (pair? prev-alts)
+ `(alt ,@(reverse (cons alt prev-alts)))
+ alt)))
+
+ (define (get-alt)
+ (let ((exprs (optimize-alt (reverse this-alt) #f)))
+ (if (= (length exprs) 1)
+ (car exprs)
+ `(seq ,@exprs))))
+
+ (define (optimize-alt exprs builder)
+ (if (pair? exprs)
+ (if (char? (car exprs))
+ (let ((builder (or builder (string-builder))))
+ (builder (car exprs))
+ (optimize-alt (cdr exprs) builder))
+ (if builder
+ (cons (builder)
+ (cons (car exprs)
+ (optimize-alt (cdr exprs) #f)))
+ (cons (car exprs)
+ (optimize-alt (cdr exprs) #f))))
+ (if builder
+ (list (builder))
+ '())))
+
+ (define (dispatch)
+ (if (have-next?)
+ (let ((char (get-next)))
+ (case char
+ ((#\\) (dispatch-backslash))
+ ((#\$) (output-expr '(line-end)))
+ ((#\^) (output-expr '(line-start)))
+ ((#\.) (output-expr '(any-char)))
+ ((#\[) (parse-char-set))
+ ((#\*) (replace-last-expr (lambda (expr) `(* ,expr))))
+ ((#\+) (replace-last-expr (lambda (expr) `(+ ,expr))))
+ ((#\?) (replace-last-expr (lambda (expr) `(? ,expr))))
+ (else (output-expr char))))
+ (get-expr)))
+
+ (define (dispatch-backslash)
+ (let ((char (get-next)))
+ (case char
+ ((#\<) (output-expr '(word-start)))
+ ((#\>) (output-expr '(word-end)))
+ ((#\b) (output-expr '(word-bound)))
+ ((#\B) (output-expr '(not-word-bound)))
+ ((#\`) (output-expr '(string-start)))
+ ((#\') (output-expr '(string-end)))
+ ((#\w) (output-expr '(char-syntax #\w)))
+ ((#\W) (output-expr '(inverse-char-syntax #\w)))
+ ((#\s) (output-expr `(char-syntax ,(get-next))))
+ ((#\S) (output-expr `(inverse-char-syntax ,(get-next))))
+ ((#\() (start-group))
+ ((#\)) (end-group))
+ ((#\|) (push-alt))
+ (else (error "Unsupported regexp:" (string #\\ char))))))
+
+ (define (output-expr expr)
+ (set! this-alt (cons expr this-alt))
+ (dispatch))
+
+ (define (replace-last-expr transform)
+ (set-car! this-alt (transform (car this-alt)))
+ (dispatch))
+
+ (define (start-group)
+ (set! group-number (fix:+ group-number 1))
+ (set! pending-groups
+ (cons (vector group-number this-alt prev-alts)
+ pending-groups))
+ (set! this-alt '())
+ (set! prev-alts '())
+ (dispatch))
+
+ (define (end-group)
+ (let ((expr `(group ,(vector-ref (car pending-groups) 0) ,(get-expr))))
+ (set! this-alt (vector-ref (car pending-groups) 1))
+ (set! prev-alts (vector-ref (car pending-groups) 2))
+ (set! pending-groups (cdr pending-groups))
+ (output-expr expr)))
+
+ (define (push-alt)
+ (set! prev-alts (cons (get-alt) prev-alts))
+ (set! this-alt '())
+ (dispatch))
+
+ (define (parse-char-set)
+ (let loop
+ ((chars
+ (append (if (next-is? #\^)
+ (list #\^)
+ '())
+ (if (next-is? #\])
+ (list #\])
+ '()))))
+ (let ((char (get-next)))
+ (if (char=? char #\])
+ (output-expr
+ (receive (ranges invert?)
+ (re-char-pattern->code-points
+ (list->string (reverse chars)))
+ (cons (if invert? 'inverse-char-set 'char-set)
+ (normalize-ranges ranges))))
+ (loop (cons char chars))))))
+
+ (dispatch))))
\ No newline at end of file
((seq (group x (seq (any-char) (any-char) (any-char) (any-char)))
(* (any-char))
(group-ref x))
- ("beriberi" 8 (x 0 4))))))
\ No newline at end of file
+ ("beriberi" 8 (x 0 4))))))
+\f
+(define-test 're-pattern->regsexp
+ (map (lambda (entry)
+ (lambda ()
+ (assert-equal (re-pattern->regsexp (car entry))
+ (cadr entry))))
+ '(("[\r\n\t ]*(This file must be converted with BinHex.*[\r\n][\r\n\t ]*:"
+ (seq (* (char-set (9 . 11) 13 32))
+ "(This file must be converted with BinHex"
+ (* (any-char))
+ (char-set 10 13)
+ (* (char-set (9 . 11) 13 32))
+ ":"))
+
+ ("^begin +[0-7]+ +.+$"
+ (seq (line-start)
+ "begin"
+ (+ #\space)
+ (+ (char-set (48 . 56)))
+ (+ #\space)
+ (+ (any-char))
+ (line-end)))
+
+ ("\\`8859-[0-9]+\\'"
+ (seq (string-start) "8859-" (+ (char-set (48 . 58))) (string-end)))
+
+ ("\\`0x\\([0-9A-Fa-f][0-9A-Fa-f]\\)\t0x\\([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]\\)\t"
+ (seq (string-start)
+ "0x"
+ (group 1
+ (seq (char-set (48 . 58) (65 . 71) (97 . 103))
+ (char-set (48 . 58) (65 . 71) (97 . 103))))
+ "\t0x"
+ (group
+ 2
+ (seq (char-set (48 . 58) (65 . 71) (97 . 103))
+ (char-set (48 . 58) (65 . 71) (97 . 103))
+ (char-set (48 . 58) (65 . 71) (97 . 103))
+ (char-set (48 . 58) (65 . 71) (97 . 103))))
+ "\t"))
+
+ ("\\`\\s *\\(error:\\)?\\s *\\(.*\\)\\s *\\'"
+ (seq (string-start)
+ (* (char-syntax #\space))
+ (? (group 1 "error:"))
+ (* (char-syntax #\space))
+ (group 2 (* (any-char)))
+ (* (char-syntax #\space))
+ (string-end))))))
\ No newline at end of file