From 30217212526a4456a3b603bc8353bbf9471d79b6 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Sun, 21 May 2017 22:31:50 -0700 Subject: [PATCH] Move re-compile-char-set from rgxcmp to chrset. This is a dependency of regsexp, which didn't work unless the regular-expression option was loaded. --- src/runtime/chrset.scm | 37 +++++++++++++++++++++++++ src/runtime/rgxcmp.scm | 61 ++++++++--------------------------------- src/runtime/runtime.pkg | 4 +-- 3 files changed, 50 insertions(+), 52 deletions(-) diff --git a/src/runtime/chrset.scm b/src/runtime/chrset.scm index 5a1a9242d..eb4934e64 100644 --- a/src/runtime/chrset.scm +++ b/src/runtime/chrset.scm @@ -523,6 +523,43 @@ USA. (define inversion-list-difference (make-inversion-list-combiner (lambda (a b) (and a (not b))))) +;;;; Char-Set Compiler + +;;; Special characters: +;;; #\] must appear as first character. +;;; #\- must appear as first or last character, or it may appear +;;; immediately after a range. +;;; #\^ must appear anywhere except as the first character in the set. + +(define (re-compile-char-set pattern negate?) + (receive (scalar-values negate?*) + (re-char-pattern->code-points pattern) + (let ((char-set (char-set* scalar-values))) + (if (if negate? (not negate?*) negate?*) + (char-set-invert char-set) + char-set)))) + +(define (re-char-pattern->code-points pattern) + (define (loop pattern scalar-values) + (if (pair? pattern) + (if (and (pair? (cdr pattern)) + (char=? (cadr pattern) #\-) + (pair? (cddr pattern))) + (loop (cdddr pattern) + (cons (cons (char->integer (car pattern)) + (fix:+ (char->integer (caddr pattern)) 1)) + scalar-values)) + (loop (cdr pattern) + (cons (char->integer (car pattern)) + scalar-values))) + scalar-values)) + + (let ((pattern (string->list pattern))) + (if (and (pair? pattern) + (char=? (car pattern) #\^)) + (values (loop (cdr pattern) '()) #t) + (values (loop pattern '()) #f)))) + ;;;; Miscellaneous character sets (define char-ctl?) diff --git a/src/runtime/rgxcmp.scm b/src/runtime/rgxcmp.scm index cd0a263dc..c4d542442 100644 --- a/src/runtime/rgxcmp.scm +++ b/src/runtime/rgxcmp.scm @@ -241,56 +241,6 @@ USA. string) (builder))) -;;;; Char-Set Compiler - -;;; Special characters: -;;; #\] must appear as first character. -;;; #\- must appear as first or last character, or it may appear -;;; immediately after a range. -;;; #\^ must appear anywhere except as the first character in the set. - -(define (re-compile-char-set pattern negate?) - (receive (scalar-values negate?*) - (re-char-pattern->code-points pattern) - (let ((char-set (char-set* scalar-values))) - (if (if negate? (not negate?*) negate?*) - (char-set-invert char-set) - char-set)))) - -(define (re-char-pattern->code-points pattern) - (define (loop pattern scalar-values) - (if (pair? pattern) - (if (and (pair? (cdr pattern)) - (char=? (cadr pattern) #\-) - (pair? (cddr pattern))) - (loop (cdddr pattern) - (cons (cons (char->integer (car pattern)) - (fix:+ (char->integer (caddr pattern)) 1)) - scalar-values)) - (loop (cdr pattern) - (cons (char->integer (car pattern)) - scalar-values))) - scalar-values)) - - (let ((pattern (string->list pattern))) - (if (and (pair? pattern) - (char=? (car pattern) #\^)) - (values (loop (cdr pattern) '()) #t) - (values (loop pattern '()) #f)))) - -;;;; Translation Tables - -(define re-translation-table - (let ((normal-table (make-bytevector #x100)) - (upcase-table (make-bytevector #x100))) - (do ((i 0 (fix:+ i 1))) - ((not (fix:< i #x100))) - (bytevector-u8-set! normal-table i i) - (bytevector-u8-set! upcase-table i - (char->integer (char-upcase (integer->char i))))) - (lambda (case-fold?) - (if case-fold? upcase-table normal-table)))) - ;;;; Pattern Compiler (define re-number-of-registers @@ -317,6 +267,17 @@ USA. (byte-stream #f read-only #t) (translation-table #f read-only #t)) +(define re-translation-table + (let ((normal-table (make-bytevector #x100)) + (upcase-table (make-bytevector #x100))) + (do ((i 0 (fix:+ i 1))) + ((not (fix:< i #x100))) + (bytevector-u8-set! normal-table i i) + (bytevector-u8-set! upcase-table i + (char->integer (char-upcase (integer->char i))))) + (lambda (case-fold?) + (if case-fold? upcase-table normal-table)))) + (define (make-compiled-regexp bytes case-fold?) (%make-compiled-regexp bytes (re-translation-table case-fold?))) diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index dfe7e87d0..90299f3bc 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -1405,6 +1405,8 @@ USA. char-set* code-point-in-char-set? compute-char-set + re-char-pattern->code-points + re-compile-char-set string->char-set) (export (runtime regular-sexpression) normalize-ranges) @@ -5283,9 +5285,7 @@ USA. compiled-regexp/byte-stream compiled-regexp/translation-table condition-type:re-compile-pattern - re-char-pattern->code-points re-compile-char - re-compile-char-set re-compile-pattern re-compile-string re-disassemble-pattern -- 2.25.1